In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/teen-phone-addiction/teen_phone_addiction_dataset.csv


**load data**

In [2]:
data = pd.read_csv("/kaggle/input/teen-phone-addiction/teen_phone_addiction_dataset.csv")

**Data expliotation**

In [3]:
data.shape

(3000, 25)

In [4]:
print(data.info())
print(data.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 25 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   ID                      3000 non-null   int64  
 1   Name                    3000 non-null   object 
 2   Age                     3000 non-null   int64  
 3   Gender                  3000 non-null   object 
 4   Location                3000 non-null   object 
 5   School_Grade            3000 non-null   object 
 6   Daily_Usage_Hours       3000 non-null   float64
 7   Sleep_Hours             3000 non-null   float64
 8   Academic_Performance    3000 non-null   int64  
 9   Social_Interactions     3000 non-null   int64  
 10  Exercise_Hours          3000 non-null   float64
 11  Anxiety_Level           3000 non-null   int64  
 12  Depression_Level        3000 non-null   int64  
 13  Self_Esteem             3000 non-null   int64  
 14  Parental_Control        3000 non-null   

In [5]:
print(data.isnull().sum())

ID                        0
Name                      0
Age                       0
Gender                    0
Location                  0
School_Grade              0
Daily_Usage_Hours         0
Sleep_Hours               0
Academic_Performance      0
Social_Interactions       0
Exercise_Hours            0
Anxiety_Level             0
Depression_Level          0
Self_Esteem               0
Parental_Control          0
Screen_Time_Before_Bed    0
Phone_Checks_Per_Day      0
Apps_Used_Daily           0
Time_on_Social_Media      0
Time_on_Gaming            0
Time_on_Education         0
Phone_Usage_Purpose       0
Family_Communication      0
Weekend_Usage_Hours       0
Addiction_Level           0
dtype: int64


This is a cleaned data so no need to do data wrangling

**Step 1: Define target**

In [6]:
y = data['Addiction_Level']

**Step 2: Prepare features**

Drop target and unnecessary columns

In [7]:
X = data.drop(['Addiction_Level', 'ID', 'Name'], axis=1)

Automatically select categorical columns

In [8]:
#  Find categorical columns
categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()

One-hot encode categorical columns

In [9]:
X_encoded = pd.get_dummies(X, columns=categorical_cols)
print(X_encoded.head())

   Age  Daily_Usage_Hours  Sleep_Hours  Academic_Performance  \
0   13                4.0          6.1                    78   
1   17                5.5          6.5                    70   
2   13                5.8          5.5                    93   
3   18                3.1          3.9                    78   
4   14                2.5          6.7                    56   

   Social_Interactions  Exercise_Hours  Anxiety_Level  Depression_Level  \
0                    5             0.1             10                 3   
1                    5             0.0              3                 7   
2                    8             0.8              2                 3   
3                    8             1.6              9                10   
4                    4             1.1              1                 5   

   Self_Esteem  Parental_Control  ...  School_Grade_11th  School_Grade_12th  \
0            8                 0  ...              False              False   
1     

**Split the Dataset**

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42
)

# Step 1: Training the Model

In [11]:
from sklearn.linear_model import LinearRegression

# Create the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Step 2: Predict outcomes on the test set

In [12]:
y_pred = model.predict(X_test)
print(y_pred)

[ 3.19840130e+07  3.19840132e+07  3.19840154e+07  3.19840124e+07
  3.19840151e+07  3.19840128e+07  3.19840107e+07  3.19840139e+07
  3.19840150e+07  3.19840137e+07  3.19840122e+07  3.19840158e+07
  9.50003815e+00  3.19840137e+07  3.19840139e+07  3.19840128e+07
  3.19840101e+07  3.19840145e+07  3.19840117e+07  3.19840121e+07
  3.19840142e+07  3.19840113e+07  3.19840127e+07  3.19840118e+07
  3.19840127e+07  3.19840149e+07  3.19840145e+07  3.19840130e+07
  3.19840115e+07  3.19840124e+07  3.19840120e+07  3.19840166e+07
  3.19840130e+07  3.19840099e+07  8.67457199e+00  3.19840131e+07
  3.19840143e+07  3.19840123e+07  9.67180252e+00  3.19840130e+07
  3.19840119e+07  3.19840127e+07  3.19840128e+07  3.19840157e+07
  3.19840121e+07  4.92625660e+07  3.19840106e+07  3.19840143e+07
  3.19840156e+07  3.19840105e+07  7.25450516e+00  3.19840122e+07
  3.19840125e+07  3.19840130e+07  3.19840128e+07  3.19840130e+07
  3.19840142e+07  3.19840139e+07  1.00830841e+01  3.19840148e+07
  3.19840142e+07  3.19840

# Step 3: Evaluating the Model

In [13]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R² Score: {r2:.2f}")

Mean Squared Error: 507092676551864896.00
Root Mean Squared Error: 712104400.04
R² Score: -201282108186838080.00
