# Basics

## Import pandas and numpy

## Use the code below to generate a data frame for students

## Your data frame should include the student number, student name, shoe_size, side of classroom, and favorite number.

## Store your data frame in a variable named students

In [1]:
import numpy as np
import pandas as pd


students = ['Sally', 'Jane', 'Suzie', 'Billy', 'Ada', 'John', 'Thomas',
            'Marie', 'Albert', 'Richard', 'Isaac', 'Alan']

student_number = list(range(1, len(students) + 1))
shoe_sizes = np.random.choice(np.arange(6, 14, 0.5), len(students))
side_of_classroom = np.random.choice(['left', 'right'], len(students))
favorite_number = np.random.randint(1, 11, len(students))

In [2]:
students = pd.DataFrame({'Student_Number': student_number, 
                   'Student_Name': students, 
                   'Shoe_Size': shoe_sizes, 
                   'Fav_Number': favorite_number, 
                   'Side_of_Room' : side_of_classroom})

In [3]:
students

Unnamed: 0,Student_Number,Student_Name,Shoe_Size,Fav_Number,Side_of_Room
0,1,Sally,6.0,2,left
1,2,Jane,6.0,3,right
2,3,Suzie,10.0,9,left
3,4,Billy,11.5,8,left
4,5,Ada,10.5,2,right
5,6,John,13.5,1,right
6,7,Thomas,11.0,8,left
7,8,Marie,6.0,1,right
8,9,Albert,6.5,5,right
9,10,Richard,7.0,3,right


## Print out the shape of the data frame.

In [4]:
students.shape

(12, 5)

## Print out the names of the columns in the data frame.

In [5]:
students.columns

Index(['Student_Number', 'Student_Name', 'Shoe_Size', 'Fav_Number',
       'Side_of_Room'],
      dtype='object')

## Rename 2 of the columns in your data frame.

In [6]:
students.rename(columns={'Student_Number': 'Student_ID', 'Student_Name' : 'Name'}, inplace=True)

In [7]:
students

Unnamed: 0,Student_ID,Name,Shoe_Size,Fav_Number,Side_of_Room
0,1,Sally,6.0,2,left
1,2,Jane,6.0,3,right
2,3,Suzie,10.0,9,left
3,4,Billy,11.5,8,left
4,5,Ada,10.5,2,right
5,6,John,13.5,1,right
6,7,Thomas,11.0,8,left
7,8,Marie,6.0,1,right
8,9,Albert,6.5,5,right
9,10,Richard,7.0,3,right


## Create a new data frame based on the one you have. The new data frame should only have columns for shoe size and side of the classroom.

In [29]:
shoe_side = students[['Shoe_Size', 'Side_of_Room']]
shoe_side

Unnamed: 0,Shoe_Size,Side_of_Room
0,6.0,left
1,6.0,right
2,10.0,left
3,11.5,left
4,10.5,right
5,13.5,right
6,11.0,left
7,6.0,right
8,6.5,right
9,7.0,right


## Create a new data frame that has all of the columns, but only 5 rows.

In [30]:
students_5_rows = students.sample(5) #OR can use .head() and .tail()
students_5_rows

Unnamed: 0,Student_ID,Name,Shoe_Size,Fav_Number,Side_of_Room,third_quartile
3,4,Billy,11.5,8,left,True
6,7,Thomas,11.0,8,left,False
11,12,Alan,11.0,9,left,False
7,8,Marie,6.0,1,right,False
10,11,Isaac,7.5,2,left,False


## Create a new data frame that has only columns for favorite number and name, and only includes 7 rows.

In [10]:
students3 = students[['Fav_Number', 'Name']].sample(7)
students3

Unnamed: 0,Fav_Number,Name
3,8,Billy
10,2,Isaac
8,5,Albert
5,1,John
1,3,Jane
7,1,Marie
9,3,Richard


## Create a new column for the ratio of shoe size to the favorite number. Name this ss_to_fn

In [11]:
students.assign(ss_to_fn=shoe_sizes/favorite_number)

Unnamed: 0,Student_ID,Name,Shoe_Size,Fav_Number,Side_of_Room,ss_to_fn
0,1,Sally,6.0,2,left,3.0
1,2,Jane,6.0,3,right,2.0
2,3,Suzie,10.0,9,left,1.111111
3,4,Billy,11.5,8,left,1.4375
4,5,Ada,10.5,2,right,5.25
5,6,John,13.5,1,right,13.5
6,7,Thomas,11.0,8,left,1.375
7,8,Marie,6.0,1,right,6.0
8,9,Albert,6.5,5,right,1.3
9,10,Richard,7.0,3,right,2.333333


## Create a new column that contains the z-score for the shoe size.

In [12]:
m = shoe_sizes.mean()
std = shoe_sizes.std()
students.assign(z_score=(shoe_sizes-m)/std)

Unnamed: 0,Student_ID,Name,Shoe_Size,Fav_Number,Side_of_Room,z_score
0,1,Sally,6.0,2,left,-1.134506
1,2,Jane,6.0,3,right,-1.134506
2,3,Suzie,10.0,9,left,0.443937
3,4,Billy,11.5,8,left,1.035854
4,5,Ada,10.5,2,right,0.641243
5,6,John,13.5,1,right,1.825075
6,7,Thomas,11.0,8,left,0.838548
7,8,Marie,6.0,1,right,-1.134506
8,9,Albert,6.5,5,right,-0.937201
9,10,Richard,7.0,3,right,-0.739895


## Transform the side_of_the_classroom columns such that the values are either R or L.

In [33]:
students.Side_of_Room.apply(lambda side: 'R' if side == 'right' else 'L')
#OR students.Side_of_Room.str[0].str.upper()

0     L
1     R
2     L
3     L
4     R
5     R
6     L
7     R
8     R
9     R
10    L
11    L
Name: Side_of_Room, dtype: object

## Find the names of all the students that have a shoe size greater than the 3rd quartile of shoe sizes (You can use the .quantile method on a series for this)

In [27]:
students3=(students.assign(third_quartile=students.Shoe_Size > students.Shoe_Size.quantile(q=.75)))
students3

Unnamed: 0,Student_ID,Name,Shoe_Size,Fav_Number,Side_of_Room,third_quartile
0,1,Sally,6.0,2,left,False
1,2,Jane,6.0,3,right,False
2,3,Suzie,10.0,9,left,False
3,4,Billy,11.5,8,left,True
4,5,Ada,10.5,2,right,False
5,6,John,13.5,1,right,True
6,7,Thomas,11.0,8,left,False
7,8,Marie,6.0,1,right,False
8,9,Albert,6.5,5,right,False
9,10,Richard,7.0,3,right,False


## Find the names of all the students that have a shoe size less than the 1st quartile of shoe sizes

In [34]:
students4=(students.assign(first_quartile=students.Shoe_Size < students.Shoe_Size.quantile(q=.25)))
students4

#df[df.shoe_size < df.shoe_size.quantile(0.25)]

Unnamed: 0,Student_ID,Name,Shoe_Size,Fav_Number,Side_of_Room,third_quartile,first_quartile
0,1,Sally,6.0,2,left,False,True
1,2,Jane,6.0,3,right,False,True
2,3,Suzie,10.0,9,left,False,False
3,4,Billy,11.5,8,left,True,False
4,5,Ada,10.5,2,right,False,False
5,6,John,13.5,1,right,True,False
6,7,Thomas,11.0,8,left,False,False
7,8,Marie,6.0,1,right,False,True
8,9,Albert,6.5,5,right,False,False
9,10,Richard,7.0,3,right,False,False
