In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd

In [None]:
df = pd.read_csv("/content/smartphone_data.csv")
df.head(5)

In [None]:
df["User_ID"].unique()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:

features = ['Daily_Phone_Hours','Social_Media_Hours','Work_Productivity_Score','Sleep_Hours','Stress_Level','Weekend_Screen_Time_Hours']

In [None]:
# Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[features])

In [None]:
# KMeans clustering
kmeans = KMeans(n_clusters=4, random_state=42)
df['Cluster_Label'] = kmeans.fit_predict(X_scaled)

In [None]:
df.to_csv("smartphone_data_clustered.csv", index=False)

In [None]:
from sklearn.linear_model import LinearRegression

df = pd.read_csv("/content/smartphone_data_linear.csv")
df.head(5)


In [None]:
features = ['Daily_Phone_Hours','Social_Media_Hours','Sleep_Hours','Caffeine_Intake_Cups','Weekend_Screen_Time_Hours']
X = df[features]
y = df['Work_Productivity_Score']


In [None]:

model = LinearRegression()
model.fit(X, y)
df['Predicted_Productivity'] = model.predict(X)

df.to_csv("smartphone_data_pred.csv", index=False)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Select features
features = [
    'Daily_Phone_Hours',
    'Social_Media_Hours',
    'Sleep_Hours',
    'Stress_Level',
    'Work_Productivity_Score',
    'Weekend_Screen_Time_Hours'
]

In [None]:

corr = df[features].corr()
plt.figure(figsize=(10, 8))

In [None]:

sns.heatmap(
    corr,
    annot=True,
    fmt=".2f",
    cmap="coolwarm",
    linewidths=0.5
)

In [None]:

plt.title("Feature Correlation Heatmap", fontsize=14)
plt.tight_layout()

In [None]:

plt.savefig(
    "feature_correlation_heatmap.png",
    dpi=300,
    bbox_inches="tight"
)

plt.show()

In [None]:
from sklearn.ensemble import RandomForestRegressor

features = ['Daily_Phone_Hours',
            'Social_Media_Hours',
            'Sleep_Hours',
            'Caffeine_Intake_Cups',
            'Weekend_Screen_Time_Hours']

X = df[features]
y = df['Stress_Level']

model = RandomForestRegressor()
model.fit(X, y)

df['Predicted_Stress'] = model.predict(X)
df.to_csv("smartphone_data_pred_stress.csv", index=False)

In [None]:
df[['Sleep_Hours','Work_Productivity_Score']].corr()

In [None]:
df[['Daily_Phone_Hours','Stress_Level']].corr()