# Python Basics and Pandas

## 1. Lists

**Example 1.1.** Create an empty list named `list1`.

In [1]:
list1 

In [2]:
list1 

**Example 1.2.** Append the first 20 even numbers to `list1`.

In [17]:
list1 = [i for i in range(1,41) if i % 2 == 0]

In [22]:
list1 = []
for i in range(1,41):
    if i % 2 == 0:
        list1.append(i)
list1

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40]

## 2. Dictionaries

**Example 2.1.** Create and empty dictionary named `d`.

In [37]:
d = {}

**Example 2.2.** Add the keys `name`, `age`, and `birthdate` with the values Mahmoud, 50, and 05/05/1970.

In [38]:
d["name"] = "Mary"
d["age"] = 50
d["birthdate"] = "05/05/1970"
d

{'name': 'Mary', 'age': 50, 'birthdate': '05/05/1970'}

**Example 2.3.** Add another set of keys and values to your dictionary `d`.

# 3. Lambda Expressions

**Example 3.1.** Write a function that will return the square of a number plus 1.

In [43]:
def f(x):
    return x**2 + 1

f(2)

5

In [45]:
g = lambda x: x**2 + 1
g(2)

5

**Example 3.2.** Sort the authors by last name using a lambda function as the key.

In [1]:
scifi_authors = ["Issac Asimov", "Ray Bradburry", "Robert Heinlein", "Aurthur C. Clarke", "Frank Herbert", "Orson Scott Card",
                "Douglas Adams", "H. G. Wells", "Leigh Brackett"]

# 4. Dataframes 

In [60]:
import numpy as np
import pandas as pd
import random

In [61]:
random.random()

0.5095760797651685

**Example 4.1.** Create a list of 10000 random numbers.

In [None]:
data1 = [random.random() for i in range(10000)]

**Example 4.2.** Create a dataframe using the `data1` list.

In [66]:
df1 = pd.DataFrame(data1)
df1.head()

Unnamed: 0,0
0,0.417302
1,0.787021
2,0.772555
3,0.871788
4,0.806577


**Example 4.3.** Create a dataframe from a nested list.

In [71]:
data2 = [[i, random.randint(1,26)] for i in 'ABCBEFGHIJKLMNOPQRSTUVWXYZ']
df2 = pd.DataFrame(data2)
df2

Unnamed: 0,0,1
0,A,13
1,B,21
2,C,14
3,B,9
4,E,11
5,F,6
6,G,14
7,H,1
8,I,9
9,J,16


**Example 4.4.** Create a dataframe from a dictionary.

In [74]:
data3 = {'Model':['T57', 'T61', 'T64', 'T65'],
         'Price':[1.42, 1.48, 1.73, 1.95],
         'Size': ['57 in', '61 in', '64 in', '65 in']}
df3 = pd.DataFrame(data3)
df3

Unnamed: 0,Model,Price,Size
0,T57,1.42,57 in
1,T61,1.48,61 in
2,T64,1.73,64 in
3,T65,1.95,65 in


**Example 4.5.** Use the `Model` as the index.

In [76]:
df4 = pd.DataFrame({'Price':data3['Price'], 'Size':data3['Size']},
                   index=data3['Model'])
df4

Unnamed: 0,Price,Size
T57,1.42,57 in
T61,1.48,61 in
T64,1.73,64 in
T65,1.95,65 in


**Example 4.6.** Slicing a dataframe by index.

In [80]:
df4.loc['T57':]

Unnamed: 0,Price,Size
T57,1.42,57 in
T61,1.48,61 in
T64,1.73,64 in
T65,1.95,65 in


**Example 4.7.** Create a dataframe from a list of dictionaries.

In [84]:
data5 = [{'Ht': 63, 'Len':45, 'Wt':2.6},
         {'Ht': 29, 'Wt':1.7},
         {'Ht': 37, 'Len':71, 'Wt':4.2}]
data5

[{'Ht': 63, 'Len': 45, 'Wt': 2.6},
 {'Ht': 29, 'Wt': 1.7},
 {'Ht': 37, 'Len': 71, 'Wt': 4.2}]

In [85]:
df5 = pd.DataFrame(data5)
df5

Unnamed: 0,Ht,Len,Wt
0,63,45.0,2.6
1,29,,1.7
2,37,71.0,4.2


**Example 4.8.** Create a csv file from a dataframe.

In [89]:
df5.to_csv('outfile.csv', index=False)

**Example 4.9.** Read a csv file and store it in a dataframe.

In [90]:
df6 = pd.read_csv('outfile.csv')
df6

Unnamed: 0,Ht,Len,Wt
0,63,45.0,2.6
1,29,,1.7
2,37,71.0,4.2


# 5. Slicing Dataframes

In [99]:
df7 = pd.read_csv('https://raw.githubusercontent.com/mahmoudharding/ma4112/main/ldm/iris.csv')
df7.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


**Example 5.1.** Using the `loc` method.

In [98]:
# view the dimensions of the dataframe
df7.shape

(150, 5)

In [100]:
# slice by row index
df7.loc[120:125:2]

In [105]:
# slice by specific row indices
df7.loc[[1,3,5]]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
1,4.9,3.0,1.4,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
5,5.4,3.9,1.7,0.4,Setosa


In [110]:
# slice by column name
df7[['sepal.length', 'sepal.width']].head()

Unnamed: 0,sepal.length,sepal.width
0,5.1,3.5
1,4.9,3.0
2,4.7,3.2
3,4.6,3.1
4,5.0,3.6


In [111]:
# slice by index and column
df7.loc[35:40, ['sepal.length', 'sepal.width']]

Unnamed: 0,sepal.length,sepal.width
35,5.0,3.2
36,5.5,3.5
37,4.9,3.6
38,4.4,3.0
39,5.1,3.4
40,5.0,3.5


In [118]:
# slice using column name 
df7.loc[df7.variety == "Setosa"]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
5,5.4,3.9,1.7,0.4,Setosa
6,4.6,3.4,1.4,0.3,Setosa
7,5.0,3.4,1.5,0.2,Setosa
8,4.4,2.9,1.4,0.2,Setosa
9,4.9,3.1,1.5,0.1,Setosa


In [117]:
# slice using conditional statements
df7.loc[df7['sepal.length']>5]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
5,5.4,3.9,1.7,0.4,Setosa
10,5.4,3.7,1.5,0.2,Setosa
14,5.8,4.0,1.2,0.2,Setosa
15,5.7,4.4,1.5,0.4,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


**Example 5.1.** Using the `iloc` method.

In [120]:
df7.iloc[:5]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [122]:
# change the index values
df7.index = [i for i in range(0, 750, 5)]
df7.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
5,4.9,3.0,1.4,0.2,Setosa
10,4.7,3.2,1.3,0.2,Setosa
15,4.6,3.1,1.5,0.2,Setosa
20,5.0,3.6,1.4,0.2,Setosa


In [123]:
df7.iloc[:5]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
5,4.9,3.0,1.4,0.2,Setosa
10,4.7,3.2,1.3,0.2,Setosa
15,4.6,3.1,1.5,0.2,Setosa
20,5.0,3.6,1.4,0.2,Setosa


In [124]:
# slice using iloc
df7.iloc[:, 1].head()

0     3.5
5     3.0
10    3.2
15    3.1
20    3.6
Name: sepal.width, dtype: float64

In [125]:
df7.iloc[1:3, 2]

5     1.4
10    1.3
Name: petal.length, dtype: float64

**Example 5.3.** Slice using the `query` function.

In [130]:
df7.query("sepal.length/2 > sepal.width")

UndefinedVariableError: name 'sepal' is not defined

In [133]:
df7.rename(columns = {'sepal.length':'sepallength', 'sepal.width':'sepalwidth' }, inplace = True) 
df7

Unnamed: 0,sepallength,sepalwidth,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
5,4.9,3.0,1.4,0.2,Setosa
10,4.7,3.2,1.3,0.2,Setosa
15,4.6,3.1,1.5,0.2,Setosa
20,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
725,6.7,3.0,5.2,2.3,Virginica
730,6.3,2.5,5.0,1.9,Virginica
735,6.5,3.0,5.2,2.0,Virginica
740,6.2,3.4,5.4,2.3,Virginica


In [136]:
df7.query('sepallength/2 > sepalwidth')

Unnamed: 0,sepallength,sepalwidth,petal.length,petal.width,variety
250,7.0,3.2,4.7,1.4,Versicolor
260,6.9,3.1,4.9,1.5,Versicolor
265,5.5,2.3,4.0,1.3,Versicolor
270,6.5,2.8,4.6,1.5,Versicolor
275,5.7,2.8,4.5,1.3,Versicolor
...,...,...,...,...,...
715,6.8,3.2,5.9,2.3,Virginica
720,6.7,3.3,5.7,2.5,Virginica
725,6.7,3.0,5.2,2.3,Virginica
730,6.3,2.5,5.0,1.9,Virginica


In [137]:
df7.query('sepalwidth in [3.2, 5.7]')

Unnamed: 0,sepallength,sepalwidth,petal.length,petal.width,variety
10,4.7,3.2,1.3,0.2,Setosa
145,4.7,3.2,1.6,0.2,Setosa
175,5.0,3.2,1.2,0.2,Setosa
210,4.4,3.2,1.3,0.2,Setosa
235,4.6,3.2,1.4,0.2,Setosa
250,7.0,3.2,4.7,1.4,Versicolor
255,6.4,3.2,4.5,1.5,Versicolor
350,5.9,3.2,4.8,1.8,Versicolor
550,6.5,3.2,5.1,2.0,Virginica
575,6.4,3.2,5.3,2.3,Virginica
