In [None]:
# We need to install a wide variety of libraries. For this we will install pandas, numpy, seaborn and matplotlib libraries.
import numpy as np
import pandas as pd
import seaborn as sns
sns.set()

import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")

# Graphics in retina format are more sharp and legible
%config InlineBackend.figure_format = 'retina'


In [None]:
data = pd.read_csv('../input/students-performance-in-exams/StudentsPerformance.csv')
data.head() # see first 5 rows

Here is the description of our features:

|  Name  | Value Type | Statistical Type |
|---         |---       |--- 
| **gender** | String | Categorical/Binary |
| **race/ethnicity** |String | Categorical/Ordinal |
| **parental level of education** |String | Categorical/Ordinal |
| **lunch** | String | Categorical/Binary |
| **test preparation course** | String | Categorical/Binary |
| **math score** | Numerical | Quantitative |
| **reading score** | Numerical | Quantitative |
| **writing score** | Numerical | Quantitative|



In [None]:
# shows the analysis of numerical values.
data.describe()

In [None]:
# showing if any feature has at least one null value
data.isnull().any()

In [None]:
# count null values
data.isnull().sum()

In [None]:
# showing cols names
for col in data.columns:
    print(col)

In [None]:
# renaming columns names
data.columns = [col.title().replace(' ','_') for col in data.columns]

# new cols names
for col in data.columns:
    print(col)

We will calculate each student will pass in each exam by make a `passmark` as an indicator of success and categorize each student within all exam by `GPA`

**we will set the minimum marks to `40` to pass in a exam**

In [None]:
passmark = 40

In [None]:
data.describe()

as we sow less then `25%` of all students get less than the pass mark


**How many students passed in Math exam?**

In [None]:
data['Pass_Math_Exam'] = (data['Math_Score'] > passmark).map({True: "P",
                                                     False: "F"})

data['Pass_Math_Exam'].value_counts()

In [None]:
sns.countplot(data=data, x= 'Pass_Math_Exam');


In [None]:
sns.countplot(data=data,x= 'Parental_Level_Of_Education', hue= 'Pass_Math_Exam')
plt.xticks(rotation= 45);


**How many students passed in Reaing exam?**

In [None]:
data['Pass_Reading_Exam'] = (data['Reading_Score'] > passmark).map({True: "P",
                                                     False: "F"})

data['Pass_Reading_Exam'].value_counts()


In [None]:
sns.countplot(data=data, x= 'Pass_Reading_Exam');


In [None]:
sns.countplot(data=data,x= 'Parental_Level_Of_Education', hue= 'Pass_Reading_Exam')
plt.xticks(rotation= 45);


**How many students passed in Writing exam?**

In [None]:
data['Pass_Writing_Exam'] = (data['Writing_Score'] > passmark).map({True: "P",
                                                     False: "F"})

data['Pass_Writing_Exam'].value_counts()


In [None]:
sns.countplot(data=data, x= 'Pass_Writing_Exam');


In [None]:
sns.countplot(data=data,x= 'Parental_Level_Of_Education', hue= 'Pass_Writing_Exam')
plt.xticks(rotation= 45);


**How many students pass in all exams?**

In [None]:
data['Overall_Passed'] = data.apply(lambda df:
                                    'P' if df['Pass_Math_Exam'] == 'P' 
                                    or df['Pass_Reading_Exam']  == 'P' 
                                    or df['Pass_Writing_Exam']  == 'P' 
                                    else 'F',
                                    axis=1)
data['Overall_Passed'].value_counts()


In [None]:
sns.countplot(data= data, hue= 'Overall_Passed',
             x= 'Parental_Level_Of_Education')
plt.xticks(rotation= 45);


In [None]:
pd.crosstab(data['Parental_Level_Of_Education'], data['Overall_Passed']).T

**GPA Of Students** 

In [None]:
data['Total_Marks'] = data['Math_Score'] + data['Reading_Score'] + data['Writing_Score']
data['Percentage'] = data['Total_Marks'] / 3.0
data.head()

**Assigning The GPA**

|  GPA  | Grad |
|-------|------|
| **A** | > 80 |
| **B** | > 70 | 
| **C** | > 60 |
| **D** | > 50 | 
| **E** | > 40 |
| **F** | < 40 |


In [None]:
def GetGrade(Percentage):
    if Percentage > 80:
        return 'A'
    elif Percentage > 70:
        return 'B'
    elif Percentage > 60:
        return 'C'
    elif Percentage > 50:
        return 'D'
    elif Percentage > 40:
        return 'E'
    else:
        return 'F'    

In [None]:
data['GPA'] = data.apply(lambda df : GetGrade(df['Percentage']), axis= 1)
data.head()                         

In [None]:
data['GPA'].value_counts().to_frame()

In [None]:
GPA_Order = list("ABCDEF")
sns.catplot(data= data, x= 'GPA',order= GPA_Order, kind= 'count');
plt.title('Number Of Students/GPA');

In [None]:
sns.catplot(data= data, hue= 'Parental_Level_Of_Education',
             x= 'GPA', kind='count', order= GPA_Order)
plt.xticks(rotation= 90);

As we sow here when the the parents "`Parental_Level_Of_Education`" get advancied education degree like `Bachelor` or `Master` the children will be more clever and the students will sucess in his\her exams ... And it's a **Positive Correlation**


**If you have any questions, I am ready to answer your questions, and will be happy if you get me feedback for keeping going**.