# PREPROCESSING

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# print(plt.style.available)
plt.style.use('seaborn-darkgrid')
plt.rcParams["figure.figsize"] = (20,10)

In [None]:
df = pd.read_csv('/kaggle/input/covid19-and-its-impact-on-students/COVID-19 Survey Student Responses.csv')
df

In [None]:
df.info()

In [None]:
df.corr()

In [None]:
df.rename(columns={'Age of Subject':'Student_age'},inplace=True)
df

## -> REMOVING NULL / REPLACING VALUES

In [None]:
df[df.isnull().values]

In [None]:
df['Rating of Online Class experience'].unique()

In [None]:
df['Medium for online class'].unique()

In [None]:
df['Rating of Online Class experience'] = df['Rating of Online Class experience'].replace(np.nan, 'Average')
df['Medium for online class'] = df['Medium for online class'].replace(np.nan, 'Smartphone or Laptop/Desktop')

In [None]:
df['Time spent on TV'].unique()

In [None]:
df['Time spent on TV'].replace({'n':'0', 'N':'0', 'No tv':'0', ' ':'0', 0:'0'}, inplace = True)
df['Time spent on TV'] = df['Time spent on TV'].astype('float', copy = True)

In [None]:
df['Prefered social media platform'].replace({'None ':'None'},inplace = True)

In [None]:
df['What you miss the most'].unique()

In [None]:
df['What you miss the most'] = df['What you miss the most'].replace(['All the above','All of the above ','everything','All above','all of the above','ALL','all','All of the above','all of them','All of them','All '],'All')
df['What you miss the most'] = df['What you miss the most'].replace(['NOTHING','Nothing this is my usual life','To stay alone. ','Nothing ','Nah, this is my usual lifestyle anyway, just being lazy....','Normal life','My normal routine','nothing'],'Nothing')
df['What you miss the most'] = df['What you miss the most'].replace(['Only friends','Friends , relatives','relatives and friends','Family ','The idea of being around fun loving people but this time has certainly made us all to reconnect (and fill the gap if any) with our families and relatives so it is fun but certainly we do miss hanging out with friends','Family'],'Friends/Relatives/Family')

In [None]:
df['Stress busters'].unique()

In [None]:
df['Stress busters'] = df['Stress busters'].replace([['Sleep'],['Scrolling through social media'],['Reading books'],['Talking to your relatives']],
                                                    ['Sleeping','Social Media','Reading','Talking'])
df['Stress busters'] = df['Stress busters'].replace(['Web Series','Watching web series'],'Watching Web-Series')
df['Stress busters'] = df['Stress busters'].replace(['Exercising','Exercise','Gym','Workout ','Cardio','workout','working out and some physical activity'],
                                                    'Exercise/Gym')

In [None]:
df

# REGION OF STUDENTS

In [None]:
regions = df['Region of residence'].value_counts().keys().tolist()
print(regions)
students_in_regions = df['Region of residence'].value_counts().tolist()
print(students_in_regions)

In [None]:
plt.pie(students_in_regions, explode = (0,0.1),labels = regions,shadow=True,autopct=lambda x: f'{x:,.2f}%\n({x * sum(students_in_regions)/100:.0f})',textprops={'fontsize':18,'weight':'bold'})
plt.show()

# AGE DISTRIBUTION OF STUDENTS

In [None]:
sns.countplot(x="Student_age", data=df, palette='Set1')
plt.xlabel('Age of Subject', weight='bold')
plt.ylabel('Number of Subjects', weight='bold')
plt.grid('True')

# RATING OF ONLINE CLASSES

In [None]:
rating_of_online_class_experience = df['Rating of Online Class experience'].value_counts().keys().tolist()
print(rating_of_online_class_experience)
count = df['Rating of Online Class experience'].value_counts().tolist()
print(count)

In [None]:
fig, ax = plt.subplots()    
 
ax.bar(rating_of_online_class_experience,count)

# ax.set_xticklabels(rating_of_online_class_experience)

plt.title('Rating of Online Class experience',size=20)
plt.xlabel('Rating',size = 20)
plt.ylabel('# Of Students',size = 20) 
for i, v in enumerate(count):
    ax.text(i-.15, 
              v+3,
              count[i],
              style = 'italic',
              fontsize=14,
              color = 'magenta')
ax.grid(True)

# DEVICES USED FOR ONLINE CLASSES

In [None]:
devices = df['Medium for online class'].value_counts().keys().tolist()
print(devices)
devices_used_by_students = df['Medium for online class'].value_counts().tolist()
print(devices_used_by_students)

In [None]:
plt.pie(devices_used_by_students, explode = (0.01,0.02,0.3,0.4,0.5), labels = devices, shadow=True, autopct=lambda p : f'{p:.2f}%  ({p * sum(devices_used_by_students)/100:,.0f})', textprops={'fontsize':18, 'weight':'bold'})
plt.show()

# DID TIME UTILISE?

In [None]:
sns.set_style('darkgrid')
sns.set_size=(11.7,8.27)
g = sns.catplot(x="Time utilized", hue="Region of residence",kind="count", data=df)
g.fig.set_figwidth(16)
g.fig.set_figheight(10)
plt.xlabel('Time Utilized',size=20)
plt.ylabel('# of students',size=20)
plt.grid('True')

In [None]:
fig, ax = plt.subplots(2,3, figsize=(20,12))
sns.violinplot(x='Time utilized', y='Time spent on Online Class', data=df, ax=ax[0,0])
sns.violinplot(x='Time utilized', y='Time spent on self study', data=df, ax=ax[0,1])
sns.violinplot(x='Time utilized', y='Time spent on fitness', data=df, ax=ax[0,2])
sns.violinplot(x='Time utilized', y='Time spent on sleep', data=df, ax=ax[1,0])
sns.violinplot(x='Time utilized', y='Time spent on social media', data=df, ax=ax[1,1])
sns.violinplot(x='Time utilized', y='Time spent on TV', data=df, ax=ax[1,2])
plt.show()



# TIME SPENT 


In [None]:
sns.violinplot(data = df[['Time spent on Online Class','Time spent on self study',
                          'Time spent on fitness','Time spent on sleep',
                          'Time spent on social media','Time spent on TV']], palette = 'Set2')
plt.title('How students spent their time?', size = 16)
plt.grid(True)

# PREFERED SOCIAL MEDIA PLATFORMS


In [None]:
social_media_platforms = df['Prefered social media platform'].value_counts().keys().tolist()
print(social_media_platforms)
counts_ = df['Prefered social media platform'].value_counts().tolist()
print(counts_)

In [None]:
fig, ax = plt.subplots()
sns.barplot(x = counts_, y = social_media_platforms, palette = 'Set1')
plt.xlabel('# of students', size = 16)
for i, v in enumerate(counts_):
    ax.text(  v+3,
              i-.15,
              f'{counts_[i]*100/sum(counts_):.2f}%',
              style = 'italic',
              fontsize=14,
              )
    

# WHAT STUDENTS MISS THE MOST?

In [None]:
fig, ax = plt.subplots()
sns.countplot(x='What you miss the most', data=df, order=df['What you miss the most'].value_counts().index[:10], palette='Set2')
plt.xlabel("# of students", size = 14, weight='bold')
plt.ylabel("What did they miss?",size = 14, weight='bold')
plt.show()

# FAVOURITE STRESS BUSTER



In [None]:
sns.countplot(y="Stress busters", data=df,order = df['Stress busters'].value_counts().index[:15])
plt.xlabel('# of Students', size = 14, weight = 'bold')
plt.ylabel('Stress Buster', size = 14, weight = 'bold')
plt.grid(True)

# CHANGE IN WEIGHT


In [None]:
df

In [None]:
plt.pie(df['Change in your weight'].value_counts().tolist(), labels = df['Change in your weight'].value_counts().keys().tolist(), textprops = {'fontsize':18})
plt.show()

In [None]:
sns.boxplot(y = 'Change in your weight', x = 'Number of meals per day', data = df)
plt.show()

# DO STUDENTS FIND THEMSELVES MORE CONNECTED WITH THEIR FAMILY, FRIENDS, OR RELATIVES?

In [None]:
connected = df['Do you find yourself more connected with your family, close friends , relatives  ?'].value_counts().keys().tolist()
counts = df['Do you find yourself more connected with your family, close friends , relatives  ?'].value_counts().tolist()
counts

In [None]:
plt.pie(counts, labels = connected, shadow = True, textprops = {'fontsize':18, 'weight':'bold'})
plt.show()

# HEALTH ISSUES AND VARIOUS FACTORS

In [None]:
fig, ax = plt.subplots(2,3, figsize=(20,12))
sns.boxplot(x='Health issue during lockdown', y='Time spent on Online Class', data=df, ax=ax[0,0])
sns.violinplot(x='Health issue during lockdown', y='Time spent on self study', data=df, ax=ax[0,1])
sns.violinplot(x='Health issue during lockdown', y='Time spent on fitness', data=df, ax=ax[0,2])
sns.violinplot(x='Health issue during lockdown', y='Time spent on sleep', data=df, ax=ax[1,0])
sns.violinplot(x='Health issue during lockdown', y='Time spent on social media', data=df, ax=ax[1,1])
sns.violinplot(x='Health issue during lockdown', y='Time spent on TV', data=df, ax=ax[1,2])
plt.show()

In [None]:
fig, ax = plt.subplots(1,2, figsize = (20,10))
sns.violinplot(x='Health issue during lockdown', y='Student_age', data=df, ax = ax[0])
sns.violinplot(x='Health issue during lockdown', y='Number of meals per day', data=df, ax=ax[1])
plt.show()