# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Data Loading & the Usuals

In [None]:
df =pd.read_csv('../input/students-performance-in-exams/StudentsPerformance.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().transpose()

# Exploratory Data Analysis

In [None]:
sns.countplot(x='race/ethnicity',data=df,palette='viridis')

In [None]:
sns.heatmap(df.corr(),vmin=-1,annot=True)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

sns.countplot(ax=axes[0],x='gender',data=df,palette='coolwarm')
sns.countplot(ax=axes[1],x='lunch',data=df,palette='colorblind')
sns.countplot(ax=axes[2],x='test preparation course',data=df,palette='deep')

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

sns.histplot(ax=axes[0],x='math score',data=df,
             hue='test preparation course',palette=["silver",'limegreen']) 

sns.histplot(ax=axes[1],x='reading score',data=df,
             hue='test preparation course',palette=["silver",'limegreen'])

sns.histplot(ax=axes[2],x='writing score',data=df,
             hue='test preparation course',palette=["silver",'limegreen'])

### **Observation:** 
students who **completed their test preparation performed better** than those who didn't prepare.

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(8, 10))

sns.scatterplot(ax=axes[0],x='math score',y='reading score',
                data=df,hue='gender', palette=["pink",'c'])

sns.scatterplot(ax=axes[1],x='math score',y='writing score',
                data=df,hue='gender', palette=["pink",'c'])

sns.scatterplot(ax=axes[2],x='reading score',y='writing score',
                data=df,hue='gender', palette=["pink",'c'])

### **Observations:**
* from graph 1 and 2 it is very clear that boys performed better in maths 
* in graph 3 we can see that girls performed better in reading and writing

In [None]:
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection = '3d')

x = df['math score']
y = df['reading score']
z = df['writing score']

ax.set_xlabel("math score")
ax.set_ylabel("reading score")
ax.set_zlabel("writing score")

#cmap = ListedColormap(sns.color_palette("husl", 256).as_hex())
#sc = ax.scatter(x, y, z, s=15, c=x, marker='o', cmap=cmap, alpha=1)
#plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)

for s in df.gender.unique():
    ax.scatter(df['math score'][df.gender==s],df['writing score']
               [df.gender==s],df['reading score'][df.gender==s],label=s)

ax.legend()

In [None]:
sns.jointplot(x='math score',y='reading score',data=df,hue='lunch',palette='deep')

### **Observation**:
students with standard lunch performed better than students who had free/reduced lunch.

In [None]:
sns.jointplot(x='math score',y='reading score',data=df,
              hue='test preparation course',palette=['tomato','dodgerblue'])

## Adding overall Score and Grades column

In [None]:
def grader(x):  # a function to generate grades 
    if x>85:
        return 'A'
    elif (x>70 and x<= 85):
        return 'B'
    elif (x>55 and x<=70):
        return 'C' 
    elif (x>40 and x<=55):
        return 'D'
    else:
        return 'E'

In [None]:
df['overall score'] = (df['math score'] + df['writing score'] + df['reading score'])/3

df['grade'] = df['overall score'].apply(grader) 


In [None]:
sns.countplot(x='grade',data=df)

In [None]:
sns.histplot(x='overall score',data =df,bins=30,kde=True,color='darkblue')

In [None]:
plt.figure(figsize=(10,8))
sns.swarmplot(x='parental level of education',y='overall score',data=df)

### **Observation:** 
Kids whose parents had a Master's Degree performed slightly better compared to the rest, but not so great variance.

In [None]:
plt.figure(figsize=(10,6))
sns.countplot(x='parental level of education',data=df,palette='deep',hue='test preparation course')

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(8, 10))

sns.boxplot(ax=axes[0],x='race/ethnicity',
            y='overall score',data=df)

sns.scatterplot(ax=axes[1],x='reading score',
                y='math score', hue='race/ethnicity',data=df)

### ***Observation:*** 
* from the above 2 graphs it is clearly visible that **race/ethnicity has got no relation with the performance of the kids** during exam
* in the box plot each ethnicity group performed very similar (group A performed a little less well comapered to other groups because the number of kids in group A are also very less)