In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.metrics import confusion_matrix

In [None]:
df = pd.read_csv('mobile_addiction.csv',encoding='utf-8')

# Knowing The Data

In [None]:
#know the shape of the data
print(df.shape)

In [None]:
#know the data types of the coulumns
print(df.info())

In [None]:
#take a look of the data
df.head()

In [None]:
#descriing numerical values
df.describe()

In [None]:
#descriing categorical values
df.describe(include='object')

In [None]:
#to get columns names
df.columns

In [None]:
#Check for null values
print(df.isnull().sum())

In [None]:
# Drop unnecessary index column
df = df.drop(columns=['Unnamed: 0'])

In [None]:
#to get columns names
df.columns

In [None]:
#to show outliers for all numerical columns
for column in df.select_dtypes(include=['number']).columns:
	fig = px.box(df, x=column, title= f'Box plot for {column}')
	fig.show()


*Almost there is no outliers in this data set*

In [None]:
features = [
    'daily_screen_time', 'app_sessions', 'social_media_usage',
    'gaming_time', 'notifications', 'night_usage', 'age',
    'work_study_hours', 'stress_level', 'apps_installed'
]

for feature in features:
    binned_col = f'{feature}_binned'

    try:
        unique_vals = df[feature].nunique()

        # Use qcut for features with many unique values
        if unique_vals >= 10:
            bin_edges = pd.qcut(df[feature], q=5, retbins=True, duplicates='drop')[1]
            num_bins = len(bin_edges) - 1
            labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High'][:num_bins]
            df[binned_col] = pd.qcut(df[feature], q=num_bins, labels=labels)
        else:
            # Fallback: cut into equal-width bins
            df[binned_col] = pd.cut(df[feature], bins=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])

        # Plot
        sns.countplot(x=binned_col, hue='addicted', data=df)
        plt.title(f'Addiction Status by {feature.replace("_", " ").title()}')
        plt.xlabel(feature.replace('_', ' ').title())
        plt.ylabel('Count')
        plt.xticks(rotation=30)
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"Skipping {feature} due to error: {e}")

I noticed that in the social media, gaming time, and night usage, when the time increased I got fewer addicted persons so I decided to calculate the proportions within each bin instead of count


In [None]:
# Features to visualize
features = [
    'daily_screen_time', 'app_sessions', 'social_media_usage',
    'gaming_time', 'notifications', 'night_usage', 'age',
    'work_study_hours', 'stress_level', 'apps_installed'
]

# Loop through each feature
for feature in features:
    binned_col = f'{feature}_binned'

    try:
        unique_vals = df[feature].nunique()

        # Choose binning strategy based on value spread
        if unique_vals >= 10:
            # Quantile binning
            bin_edges = pd.qcut(df[feature], q=5, retbins=True, duplicates='drop')[1]
            num_bins = len(bin_edges) - 1
            labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High'][:num_bins]
            df[binned_col] = pd.qcut(df[feature], q=num_bins, labels=labels)
        else:
            # Equal-width binning for low-uniqueness features
            df[binned_col] = pd.cut(df[feature], bins=5, labels=['Very Low', 'Low', 'Medium', 'High', 'Very High'])

        # Group and calculate percentage with observed=True to suppress warnings
        grouped = df.groupby([binned_col, 'addicted'], observed=True).size().reset_index(name='count')
        total_per_bin = grouped.groupby(binned_col, observed=True)['count'].transform('sum')
        grouped['percentage'] = grouped['count'] / total_per_bin * 100

        # Pivot for stacked bar plot
        pivot = grouped.pivot(index=binned_col, columns='addicted', values='percentage').fillna(0)
        pivot.columns = ['Not Addicted' if c == 0 else 'Addicted' for c in pivot.columns]

        # Plot
        pivot.plot(kind='bar', stacked=True, figsize=(8, 6), colormap='Set2')
        plt.title(f'Addiction Percentage by {feature.replace("_", " ").title()}')
        plt.xlabel(feature.replace('_', ' ').title())
        plt.ylabel('Percentage')
        plt.legend(title='Addiction Status')
        plt.xticks(rotation=30)
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"Skipping {feature} due to error: {e}")