In [1]:
import joblib
import pandas as pd
import numpy as np
import os

In [2]:
save_dir = "saved_models"
required_files = [
    "gradient_boosting_addiction_classifier.pkl",
    "feature_scaler.pkl", 
    "label_encoders.pkl",
    "model_info.pkl"
]

for file in required_files:
    filepath = f"{save_dir}/{file}"
    if os.path.exists(filepath):
        print(f"{file}")
    else:
        print(f"MISSING: {file}")

gradient_boosting_addiction_classifier.pkl
feature_scaler.pkl
label_encoders.pkl
model_info.pkl


In [3]:
model = joblib.load(f"{save_dir}/gradient_boosting_addiction_classifier.pkl")
scaler = joblib.load(f"{save_dir}/feature_scaler.pkl")
encoders = joblib.load(f"{save_dir}/label_encoders.pkl")
feature_info = joblib.load(f"{save_dir}/model_info.pkl")

In [4]:
print(f"Accuracy: {feature_info['model_performance']['accuracy']:.3f}")
print(f"AUC Score: {feature_info['model_performance']['auc_score']:.3f}")
print(f"Features: {len(feature_info['feature_names'])}")

Accuracy: 1.000
AUC Score: 1.000
Features: 21


In [6]:
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv('OPENAI_API_KEY')


In [7]:
import openai
import json

In [8]:
def generate_teen_profile(profile_type="random"):
    prompts = {
        'high_risk': """Generate a realistic profile for a teenager with HIGH phone addiction risk. 
        Include high anxiety/depression, poor sleep, excessive phone usage, poor academics, low social interaction.
        Return ONLY a JSON object with these exact keys (no extra text):""",
        
        'low_risk': """Generate a realistic profile for a healthy teenager with LOW phone addiction risk.
        Include good mental health, adequate sleep, moderate phone usage, good academics, active social life.
        Return ONLY a JSON object with these exact keys (no extra text):""",
        
        'edge_case': """Generate an unusual but realistic teenager profile that might be challenging to classify.
        Maybe high phone usage but good mental health, or low usage but high anxiety.
        Return ONLY a JSON object with these exact keys (no extra text):""",
        
        'random': """Generate a realistic teenager profile with random but believable characteristics.
        Return ONLY a JSON object with these exact keys (no extra text):"""
    }
    
    feature_template = """
    {
        "Age": [integer 13-19],
        "Gender": ["Female", "Male", or "Other"],
        "School_Grade": ["7th", "8th", "9th", "10th", "11th", "12th"],
        "Daily_Usage_Hours": [float 0.5-12.0],
        "Sleep_Hours": [float 4.0-10.0],
        "Academic_Performance": [float 20.0-100.0],
        "Social_Interactions": [float 0.0-10.0],
        "Exercise_Hours": [float 0.0-5.0],
        "Anxiety_Level": [float 1.0-10.0],
        "Depression_Level": [float 1.0-10.0],
        "Self_Esteem": [float 1.0-10.0],
        "Parental_Control": [float 1.0-10.0],
        "Screen_Time_Before_Bed": [float 0.0-4.0],
        "Phone_Checks_Per_Day": [integer 10-300],
        "Apps_Used_Daily": [integer 3-30],
        "Time_on_Social_Media": [float 0.0-8.0],
        "Time_on_Gaming": [float 0.0-6.0],
        "Time_on_Education": [float 0.0-4.0],
        "Phone_Usage_Purpose": ["Browsing", "Education", "Gaming", "Other", "Social Media"],
        "Family_Communication": [float 1.0-10.0],
        "Weekend_Usage_Hours": [float 1.0-15.0]
    }
    """
    
    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a teen psychology expert creating realistic profiles for research."},
                {"role": "user", "content": prompts[profile_type] + feature_template}
            ],
            temperature=0.7,
            max_tokens=500
        )
        
        # Extract and parse JSON
        content = response.choices[0].message.content.strip()
        
        # Clean up any extra text before/after JSON
        if '{' in content and '}' in content:
            start = content.find('{')
            end = content.rfind('}') + 1
            json_str = content[start:end]
            
            profile = json.loads(json_str)
            return profile
        else:
            print(f"Failed to parse JSON from: {content[:100]}...")
            return None
            
    except Exception as e:
        print(f"Error generating profile: {e}")
        return None

In [9]:
test_profile = generate_teen_profile("random")
if test_profile:
    print("AI Profile Generator works!")
    print("Sample profile generated:")
    for key, value in test_profile.items():
        print(f"   {key}: {value}")
else:
    print("Profile generation failed")

AI Profile Generator works!
Sample profile generated:
   Age: 16
   Gender: Female
   School_Grade: 11th
   Daily_Usage_Hours: 7.5
   Sleep_Hours: 7.5
   Academic_Performance: 85.5
   Social_Interactions: 6.2
   Exercise_Hours: 2.5
   Anxiety_Level: 6.8
   Depression_Level: 4.2
   Self_Esteem: 7.5
   Parental_Control: 5.0
   Screen_Time_Before_Bed: 1.5
   Phone_Checks_Per_Day: 70
   Apps_Used_Daily: 20
   Time_on_Social_Media: 4.0
   Time_on_Gaming: 3.0
   Time_on_Education: 2.5
   Phone_Usage_Purpose: Social Media
   Family_Communication: 8.0
   Weekend_Usage_Hours: 12.5


In [10]:
def predict_teen_addiction(profile_dict):
    df = pd.DataFrame([profile_dict])

    categorical_features = ['Gender', 'School_Grade', 'Phone_Usage_Purpose']
    for c in categorical_features:
        if c in df.columns and c in encoders:
            try:
                df[c] = encoders[c].transform(df[c])
            except ValueError as e:
                print(f"Unknown category in {c}: {df[c].iloc[0]}")
                df[c] = 0 
    
    df = df[feature_info['feature_names']]
    risk_probability = model.predict_proba(df)[0][1]
    risk_class = model.predict(df)[0]
    
    return risk_probability, risk_class

profile_types = ['high_risk', 'low_risk', 'edge_case', 'random', 'random']
test_results = []

for i, profile_type in enumerate(profile_types, 1):
    print(f"Profile #{i} ({profile_type.upper()}):")
    
    # AI profile
    ai_profile = generate_teen_profile(profile_type)
    
    if ai_profile:
        risk_prob, risk_class = predict_teen_addiction(ai_profile)
        test_results.append({
            'profile_id': i,
            'type': profile_type,
            'risk_probability': risk_prob,
            'risk_class': risk_class,
            'risk_level': 'HIGH' if risk_prob > 0.5 else 'LOW',
            'profile_data': ai_profile
        })
        
        # key info
        print(f"{ai_profile['Age']}y {ai_profile['Gender']}, {ai_profile['School_Grade']}")
        print(f"Usage: {ai_profile['Daily_Usage_Hours']}h/day, {ai_profile['Phone_Checks_Per_Day']} checks")
        print(f"Mental: Anxiety {ai_profile['Anxiety_Level']}/10, Depression {ai_profile['Depression_Level']}/10")
        print(f"Sleep: {ai_profile['Sleep_Hours']}h, Screen before bed: {ai_profile['Screen_Time_Before_Bed']}h")
        print(f"MODEL PREDICTION: {risk_prob:.1%} risk ({'ADDICTED' if risk_class == 1 else 'NON-ADDICTED'})")
        
        expected_high = (ai_profile['Anxiety_Level'] > 7 or ai_profile['Depression_Level'] > 7 or 
                        ai_profile['Sleep_Hours'] < 6 or ai_profile['Daily_Usage_Hours'] > 6)
        
        if profile_type == 'high_risk' and risk_prob < 0.5:
            print("INTERESTING: High-risk profile classified as low risk")
        elif profile_type == 'low_risk' and risk_prob > 0.5:
            print("INTERESTING: Low-risk profile classified as high risk")
        elif profile_type == 'edge_case':
            print("EDGE CASE: Perfect for testing model boundaries")
    else:
        print("Failed to generate profile")

# Summary analysis
print(f"TEST RESULTS SUMMARY:")
print("=" * 50)

if test_results:
    results_df = pd.DataFrame([{
        'Profile_ID': r['profile_id'],
        'Type': r['type'],
        'Risk_Probability': r['risk_probability'],
        'Risk_Level': r['risk_level']
    } for r in test_results])
    
    print(results_df)
    
    high_risk_count = sum(1 for r in test_results if r['risk_probability'] > 0.5)
    print(f"CLASSIFICATION BREAKDOWN:")
    print(f"High Risk: {high_risk_count}/{len(test_results)} profiles")
    print(f"Low Risk: {len(test_results) - high_risk_count}/{len(test_results)} profiles")

    expected_vs_actual = 0
    for r in test_results:
        if r['type'] == 'high_risk' and r['risk_probability'] > 0.5:
            expected_vs_actual += 1
        elif r['type'] == 'low_risk' and r['risk_probability'] < 0.5:
            expected_vs_actual += 1
    
    accuracy_on_ai = expected_vs_actual / len([r for r in test_results if r['type'] in ['high_risk', 'low_risk']])
    if accuracy_on_ai > 0:
        print(f"Model Logic Check: {accuracy_on_ai:.1%} accuracy on expected vs actual")

Profile #1 (HIGH_RISK):
16y Female, 11th
Usage: 8.5h/day, 200 checks
Mental: Anxiety 8.0/10, Depression 9.0/10
Sleep: 5.0h, Screen before bed: 3.0h
MODEL PREDICTION: 100.0% risk (ADDICTED)
Profile #2 (LOW_RISK):
16y Female, 11th
Usage: 3.5h/day, 50 checks
Mental: Anxiety 3.0/10, Depression 2.0/10
Sleep: 7.5h, Screen before bed: 1.5h
MODEL PREDICTION: 1.7% risk (NON-ADDICTED)
Profile #3 (EDGE_CASE):
16y Other, 11th
Usage: 6.5h/day, 150 checks
Mental: Anxiety 2.5/10, Depression 3.0/10
Sleep: 7.5h, Screen before bed: 2.0h
MODEL PREDICTION: 76.5% risk (ADDICTED)
EDGE CASE: Perfect for testing model boundaries
Profile #4 (RANDOM):
16y Female, 11th
Usage: 8.5h/day, 120 checks
Mental: Anxiety 6.5/10, Depression 4.0/10
Sleep: 7.5h, Screen before bed: 1.5h
MODEL PREDICTION: 70.5% risk (ADDICTED)
Profile #5 (RANDOM):
16y Male, 11th
Usage: 6.5h/day, 120 checks
Mental: Anxiety 4.8/10, Depression 3.2/10
Sleep: 7.5h, Screen before bed: 2.3h
MODEL PREDICTION: 73.2% risk (ADDICTED)
TEST RESULTS SUMMAR

In [11]:
def get_ai_recommendations(profile_data, risk_probability):
    recommendations = []
    
    # Mental Health Priority
    if profile_data['Anxiety_Level'] >= 7:
        recommendations.append("HIGH ANXIETY: Download Headspace/Calm, practice 5-minute breathing exercises daily")
    if profile_data['Depression_Level'] >= 7:
        recommendations.append("DEPRESSION SUPPORT: Talk to school counselor, maintain social connections, consider therapy")
    
    # Sleep Optimization
    if profile_data['Sleep_Hours'] <= 6:
        recommendations.append("SLEEP CRISIS: Set 10pm bedtime, create phone-free bedroom after 9pm")
    if profile_data['Screen_Time_Before_Bed'] >= 2:
        recommendations.append("DIGITAL SUNSET: No screens 2 hours before bed, use blue light filters after sunset")
    
    # Usage Control
    if profile_data['Phone_Checks_Per_Day'] >= 100:
        recommendations.append("NOTIFICATION DETOX: Turn off all non-essential notifications, use Focus modes")
    if profile_data['Daily_Usage_Hours'] >= 6:
        recommendations.append("USAGE LIMITS: Set 4-hour daily limit, use app timers, try 1-hour phone-free periods")
    
    # Academic/Social Support
    if profile_data['Academic_Performance'] <= 70:
        recommendations.append("ACADEMIC BOOST: Use Pomodoro technique, create phone-free study zones")
    if profile_data['Social_Interactions'] <= 4:
        recommendations.append("SOCIAL REVIVAL: Plan 3 face-to-face activities weekly, join school clubs")
    
    # Personalized based on primary usage
    if profile_data['Phone_Usage_Purpose'] == 'Social Media':
        recommendations.append("SOCIAL MEDIA DETOX: Unfollow accounts that cause comparison, limit to 1 hour daily")
    elif profile_data['Phone_Usage_Purpose'] == 'Gaming':
        recommendations.append("GAMING BALANCE: Set gaming timers, find offline hobbies, no gaming before bed")
    
    return recommendations

# Test recommendations 
for i, result in enumerate(test_results, 1):
    profile_data = result['profile_data']
    risk_prob = result['risk_probability']
    
    print(f"AI TEEN #{i} ({result['type'].upper()}) - {risk_prob:.1%} RISK:")
    print(f"Profile: {profile_data['Age']}y {profile_data['Gender']}, {profile_data['Daily_Usage_Hours']}h usage")
    
    recommendations = get_ai_recommendations(profile_data, risk_prob)
    
    if recommendations:
        print(f"PERSONALIZED RECOMMENDATIONS:")
        for j, rec in enumerate(recommendations, 1):
            print(f"  {j}. {rec}")
    else:
        print("NO RECOMMENDATIONS NEEDED - Healthy digital habits!")

total_recs = 0
for result in test_results:
    profile_data = result['profile_data']
    recommendations = get_ai_recommendations(profile_data, result['risk_probability'])
    total_recs += len(recommendations)

    has_mental_health_rec = any('ANXIETY' in rec or 'DEPRESSION' in rec for rec in recommendations)
    has_sleep_rec = any('SLEEP' in rec or 'SUNSET' in rec for rec in recommendations)
    has_usage_rec = any('USAGE' in rec or 'NOTIFICATION' in rec for rec in recommendations)
    
    print(f"Teen #{result['profile_id']}: {len(recommendations)} recommendations")
    print(f"  Mental Health: {'✅' if has_mental_health_rec else '❌'}")
    print(f"  Sleep: {'✅' if has_sleep_rec else '❌'}")
    print(f"  Usage: {'✅' if has_usage_rec else '❌'}")

print(f"TOTAL RECOMMENDATIONS GENERATED: {total_recs}")
print(f"Average per teen: {total_recs/len(test_results):.1f}")

AI TEEN #1 (HIGH_RISK) - 100.0% RISK:
Profile: 16y Female, 8.5h usage
PERSONALIZED RECOMMENDATIONS:
  1. HIGH ANXIETY: Download Headspace/Calm, practice 5-minute breathing exercises daily
  2. DEPRESSION SUPPORT: Talk to school counselor, maintain social connections, consider therapy
  3. SLEEP CRISIS: Set 10pm bedtime, create phone-free bedroom after 9pm
  4. DIGITAL SUNSET: No screens 2 hours before bed, use blue light filters after sunset
  5. NOTIFICATION DETOX: Turn off all non-essential notifications, use Focus modes
  6. USAGE LIMITS: Set 4-hour daily limit, use app timers, try 1-hour phone-free periods
  7. ACADEMIC BOOST: Use Pomodoro technique, create phone-free study zones
  8. SOCIAL REVIVAL: Plan 3 face-to-face activities weekly, join school clubs
  9. SOCIAL MEDIA DETOX: Unfollow accounts that cause comparison, limit to 1 hour daily
AI TEEN #2 (LOW_RISK) - 1.7% RISK:
Profile: 16y Female, 3.5h usage
NO RECOMMENDATIONS NEEDED - Healthy digital habits!
AI TEEN #3 (EDGE_CASE)

In [None]:
flask_app_code = '''
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
import joblib
import pandas as pd
import numpy as np
import os

app = Flask(__name__)
CORS(app)

# Load model components
model = joblib.load("saved_models/gradient_boosting_addiction_classifier.pkl")
scaler = joblib.load("saved_models/feature_scaler.pkl")
encoders = joblib.load("saved_models/label_encoders.pkl")
feature_info = joblib.load("saved_models/model_info.pkl")

def predict_addiction_risk(profile_data):
    """Predict addiction risk and generate recommendations"""
    try:
        # Convert to DataFrame
        df = pd.DataFrame([profile_data])
        
        # Apply label encoders
        categorical_features = ['Gender', 'School_Grade', 'Phone_Usage_Purpose']
        for cat_feature in categorical_features:
            if cat_feature in df.columns and cat_feature in encoders:
                df[cat_feature] = encoders[cat_feature].transform(df[cat_feature])
        
        # Ensure correct feature order
        df = df[feature_info['feature_names']]
        
        # Predict
        risk_probability = float(model.predict_proba(df)[0][1])
        risk_class = int(model.predict(df)[0])
        
        # Get feature importance for this prediction
        feature_importance = model.feature_importances_
        feature_names = feature_info['feature_names']
        
        importance_data = [
            {"feature": name, "importance": float(importance)} 
            for name, importance in zip(feature_names, feature_importance)
        ]
        importance_data.sort(key=lambda x: x['importance'], reverse=True)
        
        # Generate recommendations
        recommendations = generate_recommendations(profile_data, risk_probability)
        
        return {
            'risk_probability': risk_probability,
            'risk_class': risk_class,
            'risk_level': 'HIGH' if risk_probability > 0.5 else 'LOW',
            'feature_importance': importance_data[:10],  # Top 10 features
            'recommendations': recommendations,
            'success': True
        }
        
    except Exception as e:
        return {'success': False, 'error': str(e)}

def generate_recommendations(profile_data, risk_probability):
    """Generate personalized recommendations"""
    recommendations = []
    
    # Mental Health
    if profile_data['Anxiety_Level'] >= 7:
        recommendations.append({
            'category': 'Mental Health',
            'icon': '🧠',
            'title': 'Anxiety Management',
            'description': 'Try meditation apps like Headspace, practice deep breathing exercises',
            'priority': 'high'
        })
    
    if profile_data['Depression_Level'] >= 7:
        recommendations.append({
            'category': 'Mental Health',
            'icon': '💙',
            'title': 'Mood Support',
            'description': 'Consider talking to a counselor, maintain social connections',
            'priority': 'high'
        })
    
    # Sleep
    if profile_data['Sleep_Hours'] <= 6:
        recommendations.append({
            'category': 'Sleep',
            'icon': '😴',
            'title': 'Sleep Improvement',
            'description': 'Aim for 8+ hours, create bedtime routine',
            'priority': 'medium'
        })
    
    if profile_data['Screen_Time_Before_Bed'] >= 1.5:
        recommendations.append({
            'category': 'Sleep',
            'icon': '🌙',
            'title': 'Digital Sunset',
            'description': 'No screens 1 hour before bed, use blue light filters',
            'priority': 'medium'
        })
    
    # Usage Control
    if profile_data['Phone_Checks_Per_Day'] >= 100:
        recommendations.append({
            'category': 'Usage',
            'icon': '🔔',
            'title': 'Notification Control',
            'description': 'Turn off non-essential notifications, use Do Not Disturb',
            'priority': 'high'
        })
    
    if profile_data['Daily_Usage_Hours'] >= 6:
        recommendations.append({
            'category': 'Usage',
            'icon': '⏰',
            'title': 'Usage Limits',
            'description': 'Set app timers, try phone-free hours',
            'priority': 'medium'
        })
    
    return recommendations

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/api/predict', methods=['POST'])
def predict():
    try:
        data = request.json
        result = predict_addiction_risk(data)
        return jsonify(result)
    except Exception as e:
        return jsonify({'success': False, 'error': str(e)})

@app.route('/api/model_info')
def model_info():
    return jsonify({
        'accuracy': feature_info['model_performance']['accuracy'],
        'auc_score': feature_info['model_performance']['auc_score'],
        'total_features': len(feature_info['feature_names']),
        'model_type': 'Gradient Boosting Classifier'
    })

if __name__ == '__main__':
    app.run(debug=True)
'''

with open('app.py', 'w') as f:
    f.write(flask_app_code)

requirements = '''
Flask==2.3.3x
flask-cors==4.0.0
joblib==1.3.2
pandas==2.0.3
numpy==1.24.3
scikit-learn==1.3.0
'''

with open('requirements.txt', 'w') as f:
    f.write(requirements)

print("Flask backend created: app.py")
print("Requirements file created: requirements.txt")
print("Ready for frontend development!")

Flask backend created: app.py
Requirements file created: requirements.txt
Ready for frontend development!
