In [2]:
import pandas as pd
import numpy as np
import os

In [4]:
df_exams = pd.read_csv('StudentsPerformance.csv')
df_exams.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


# 1 Selecting one column

## Syntax 1

In [5]:
# Select a column with [] (preferred way to select a column)
df_exams['gender']

0      female
1      female
2      female
3        male
4        male
        ...  
995    female
996      male
997    female
998    female
999    female
Name: gender, Length: 1000, dtype: object

In [6]:
# Check out the data type of a column
type(df_exams['gender'])

pandas.core.series.Series

In [7]:
# Series: Attributes and Methods
df_exams['gender'].index
df_exams['gender'].head()

0    female
1    female
2    female
3      male
4      male
Name: gender, dtype: object

## Syntax 2

In [8]:
# Select a column with .
df_exams.gender

0      female
1      female
2      female
3        male
4        male
        ...  
995    female
996      male
997    female
998    female
999    female
Name: gender, Length: 1000, dtype: object

In [18]:
# Select a column with . (pitfalls)
# df_exams.math score

In [10]:
# Select the same column using []
df_exams['math score']

0      72
1      69
2      90
3      47
4      76
       ..
995    88
996    62
997    59
998    68
999    77
Name: math score, Length: 1000, dtype: int64

## Selecting Multiple Columns

In [13]:
df_exams[['gender', 'math score', 'reading score']]

Unnamed: 0,gender,math score,reading score
0,female,72,72
1,female,69,90
2,female,90,95
3,male,47,57
4,male,76,78
...,...,...,...
995,female,88,99
996,male,62,55
997,female,59,71
998,female,68,78


In [19]:
df_female = df_exams[df_exams['gender']=='female']
df_female.shape

(518, 8)

In [22]:
df_female['math score'].sum()

32962

## Adding a new column

In [24]:
# Adding a new column
df_exams['language score'] = 70
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,language score
0,female,group B,bachelor's degree,standard,none,72,72,74,70
1,female,group C,some college,standard,completed,69,90,88,70
2,female,group B,master's degree,standard,none,90,95,93,70
3,male,group A,associate's degree,free/reduced,none,47,57,44,70
4,male,group C,some college,standard,none,76,78,75,70
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,70
996,male,group C,high school,free/reduced,none,62,55,55,70
997,female,group C,high school,free/reduced,completed,59,71,65,70
998,female,group D,some college,standard,completed,68,78,77,70


## Adding a new column with an array

In [26]:
# Create an array of 1000 elements
language_score = np.arange(0,1000)

In [28]:
# Adding the columns to dataframe
df_exams['language score'] = language_score
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,language score
0,female,group B,bachelor's degree,standard,none,72,72,74,0
1,female,group C,some college,standard,completed,69,90,88,1
2,female,group B,master's degree,standard,none,90,95,93,2
3,male,group A,associate's degree,free/reduced,none,47,57,44,3
4,male,group C,some college,standard,none,76,78,75,4
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,995
996,male,group C,high school,free/reduced,none,62,55,55,996
997,female,group C,high school,free/reduced,completed,59,71,65,997
998,female,group D,some college,standard,completed,68,78,77,998


In [33]:
# Create random numbers between 1 to 100
int_lang_score = np.random.randint(1,100,size=1000)

In [34]:
df_exams['language score'] = int_lang_score
df_exams

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,language score
0,female,group B,bachelor's degree,standard,none,72,72,74,51
1,female,group C,some college,standard,completed,69,90,88,3
2,female,group B,master's degree,standard,none,90,95,93,39
3,male,group A,associate's degree,free/reduced,none,47,57,44,56
4,male,group C,some college,standard,none,76,78,75,97
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,43
996,male,group C,high school,free/reduced,none,62,55,55,56
997,female,group C,high school,free/reduced,completed,59,71,65,80
998,female,group D,some college,standard,completed,68,78,77,20


In [35]:
# Creating random float numbers
np.random.uniform(1,100, size=1000)

array([16.97325219, 38.16307532, 91.42373141, 37.97957506, 73.30466751,
       16.06739691, 43.01082374, 60.55080597, 67.77500065, 61.83670282,
       34.02690516, 70.48253772, 79.40696371, 18.82913337, 29.62368786,
       71.38389787, 82.50395752, 28.82103508, 27.06190757, 95.75844271,
       28.50061534, 46.17978071, 30.62806816, 97.37218654, 11.33301376,
       51.14242792, 93.04497764, 53.34865094, 48.39167794, 75.33082393,
       49.74838184, 25.84588568, 82.77370021, 94.52640596, 54.05251797,
       16.95760394, 15.91312463, 54.1243453 , 23.48132592, 99.81710266,
       39.32427288, 10.19663584, 26.76900882, 78.56166715, 19.70310391,
       58.02691155, 57.81667904,  8.61014003, 77.03621829,  4.15342522,
       80.39405477, 69.05382318,  3.12084648, 82.88790054,  4.97892939,
       44.99729657, 84.15438011, 17.04897355,  5.94958843, 74.03626343,
       97.86093925, 84.59325947, 55.62459178, 75.37705381, 62.31352238,
       83.17285547, 26.81337269, 65.29391809, 35.47721755, 67.91