In [None]:
# Import necessary libraries
import pandas as pd
from google.colab import files
from scipy.stats import ttest_1samp, ttest_ind
import matplotlib.pyplot as plt
import seaborn as sns

# Upload the dataset
uploaded = files.upload()

# Load the dataset
file_name = list(uploaded.keys())[0]  # Get the uploaded file's name
data = pd.read_csv(file_name)

# Display the first few rows and dataset info
print(data.head())
print(data.info())

# Step 1: Visualize data distributions
plt.figure(figsize=(12, 5))

# Histogram for App Usage Time
plt.subplot(1, 2, 1)
sns.histplot(data['App Usage Time (min/day)'], kde=True, color='blue')
plt.title('Distribution of App Usage Time (min/day)')
plt.xlabel('App Usage Time (min/day)')
plt.ylabel('Frequency')

# Boxplot for Screen On Time by Gender
plt.subplot(1, 2, 2)
sns.boxplot(x='Gender', y='Screen On Time (hours/day)', data=data)
plt.title('Screen On Time by Gender')
plt.xlabel('Gender')
plt.ylabel('Screen On Time (hours/day)')
plt.tight_layout()
plt.show()

# Step 2: One-sample t-test
# Hypothesis: The mean App Usage Time is 300 minutes/day
app_usage = data['App Usage Time (min/day)']
population_mean = 300
t_stat_one_sample, p_value_one_sample = ttest_1samp(app_usage, population_mean)

# Visualization of one-sample t-test
plt.figure(figsize=(8, 6))
sns.histplot(app_usage, kde=True, color='green')
plt.axvline(x=population_mean, color='red', linestyle='--', label='Population Mean (300)')
plt.axvline(x=app_usage.mean(), color='blue', linestyle='--', label='Sample Mean')
plt.title('One-Sample T-Test: App Usage Time')
plt.xlabel('App Usage Time (min/day)')
plt.ylabel('Frequency')
plt.legend()
plt.show()

# Step 3: Two-sample t-test
# Hypothesis: The mean Screen On Time is different between Male and Female users
screen_on_time_male = data[data['Gender'] == 'Male']['Screen On Time (hours/day)']
screen_on_time_female = data[data['Gender'] == 'Female']['Screen On Time (hours/day)']
t_stat_two_sample, p_value_two_sample = ttest_ind(screen_on_time_male, screen_on_time_female)

# Visualization of two-sample t-test
plt.figure(figsize=(8, 6))
sns.kdeplot(screen_on_time_male, label='Male', shade=True)
sns.kdeplot(screen_on_time_female, label='Female', shade=True)
plt.title('Two-Sample T-Test: Screen On Time')
plt.xlabel('Screen On Time (hours/day)')
plt.ylabel('Density')
plt.legend()
plt.show()

# Display results
print("One-sample t-test:")
print(f"t-statistic: {t_stat_one_sample}, p-value: {p_value_one_sample}")

print("Two-sample t-test:")
print(f"t-statistic: {t_stat_two_sample}, p-value: {p_value_two_sample}")
