 # MENTAL HEALTH AND DIGITAL BEHAVIOUR

 Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("C:\\Users\\Hp\\Downloads\\archive\\mental_health_digital_behavior_data.csv")
df.head()

Above a Csv file was imported using Pandas library. This csv file contained information about social media usage and mental health scores

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

# Checking Data Quality

In [None]:
print(df.isnull().sum())

In [None]:
print(df.duplicated().sum())

# Basic Visualization

In [None]:
plt.hist(df['daily_screen_time_min'], bins=20, color='blue', alpha=0.5)
plt.title('Distribution of Daily Screen Time (min)')
plt.xlabel('Daily Screen Time (min)')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.scatter(df["social_media_time_min"], df["daily_screen_time_min"],alpha=0.5)
plt.title('Social Media Time vs Daily Screen Time')
plt.xlabel('Social Media Time (min)')
plt.ylabel('Daily Screen Time (min)')
plt.show()

In [None]:
# Scatter plot: screen time vs. focus score
plt.scatter(df['daily_screen_time_min'], df['focus_score'], alpha=0.5)
plt.title("Screen Time vs Focus Score")
plt.xlabel("Daily Screen Time (min)")
plt.ylabel("Focus Score")
plt.show()



# Correlation Analysis

In [None]:
# Correlation matrix
corr_matrix = df.corr()
print(corr_matrix)

# Heatmap-like view (simple version)
# 3. Create a heatmap plot
fig, ax = plt.subplots(figsize=(8, 6))  # set figure size
cax = ax.matshow(corr_matrix, cmap="coolwarm")  # create heatmap with colors

# 4. Add color bar
plt.colorbar(cax)  # side bar to show scale (-1 to +1)

# 5. Add labels
ax.set_xticks(np.arange(len(corr_matrix.columns)))   # position ticks
ax.set_yticks(np.arange(len(corr_matrix.columns)))
ax.set_xticklabels(corr_matrix.columns, rotation=90)  # column names on X-axis
ax.set_yticklabels(corr_matrix.columns)               # column names on Y-axis

# 6. Add title
plt.title("Correlation Heatmap", pad=20)

# 7. Show plot
plt.show()


Acessing columns using Pandas

In [None]:
df.columns

In [None]:
# iloc = index location
df.iloc[[0, 1, 2, 3, 4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],0:6]

In [None]:
df.loc[0:5, ["daily_screen_time_min", "focus_score"]]
df.loc[df["focus_score"] > 7.0, ["daily_screen_time_min", "focus_score"]]

In [None]:
# Create binary target column
df["focus_target"] = (df["focus_score"] >= 7).astype(int)

# Check the distribution
print(df["focus_target"].value_counts())


In [None]:
import numpy as np

features = ["notification_count", "daily_screen_time_min", "num_app_switches"]
x = df[features].to_numpy(dtype=float)   # shape: (n_samples, 3)
y = df["focus_target"].to_numpy(dtype=int)  # shape: (n_samples,)


# Spliting data into (Test/Train)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42,stratify=y)
len(X_train), len(X_test)
print(X_train)

FEATURE SCALING : USING MEAN NORMALIZATION

In [None]:
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
std[std == 0] = 1.0   # avoid division by zero if a feature has no variance

# 7. normalize 
X_train_norm = (X_train - mean) / std
X_test_norm  = (X_test  - mean) / std

In [None]:
X_train_final = np.c_[np.ones((X_train_norm.shape[0], 1)), X_train_norm]

X_test_final  = np.c_[np.ones((X_test_norm.shape[0], 1)),  X_test_norm]

In [None]:
print(X_train_final)


NEXT STEP : Prediction, Error calculation , Loss Function , Cost function , Applying Gradient Descent..

In [None]:
def sigmoid(z):

    g = 1 / (1 + np.exp(-z))
    return g

In [None]:
def compute_cost(X,Y,w,b):
    m,n = X.shape
    z =np.dot(X, w)+b
    f_wb = sigmoid(z)
    cost = -1/m * np.sum(y_train * np.log(f_wb) + (1 - y_train) * np.log(1 - f_wb))
    total_cost = np.sum(cost)
    return total_cost

In [None]:
w_temp = np.zeros(X_train_final.shape[1])
b_temp = 0.0
x = X_train_final
y = y_train
predict= print(compute_cost(x,y,w_temp,b_temp))

In [None]:
def compute_gradient(x, y, w, b):
    m, n = x.shape
    dj_dw = np.zeros(n)
    dj_db = 0.0
    for i in range(m):
        f_wb = sigmoid(np.dot(x[i], w) + b)
        error = f_wb - y[i]
        for j in range(n):
            dj_dw[j] += error * x[i, j]
        dj_db += error
    dj_dw /= m
    dj_db /= m
    return dj_db, dj_dw

In [None]:
x_temp = X_train_final
y_temp = y_train
w_temp = np.zeros(X_train_final.shape[1])
b_temp = 0.0
dj_db, dj_dw = compute_gradient(x_temp, y_temp, w_temp, b_temp)
print(dj_db)
print(dj_dw)

In [None]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters):
    w = w_in.copy()
    b = b_in
    for i in range(num_iters):
        dj_db, dj_dw = compute_gradient(x, y, w, b)
        w -= alpha * dj_dw
        b -= alpha * dj_db
        if i % 100 == 0:
            cost = compute_cost(x, y, w, b)
            print(f"Iteration {i}: Cost {cost}, dj_db {dj_db}, dj_dw {dj_dw}")
    return w, b

In [None]:
x = X_train_final
y = y_train
w_in = np.zeros(X_train_final.shape[1])
b_in = 0.0
alpha = 0.01
final_parameters = gradient_descent(x, y, w_in, b_in, alpha, 1000)

In [None]:
# Probabilities
w_final, b_final = final_parameters
probs = sigmoid(np.dot(X_test_final, w_final) + b_final)

# Convert to 0/1 predictions
y_pred = (probs >= 0.5).astype(int)

print("Predicted labels:", y_pred)



In [None]:
from sklearn.metrics import accuracy_score, classification_report

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
