# Broadcasting

In [321]:
# Using broadcast to cater matrices with different dimensions
# one row and one column is ideal for broadcasting, may give errors in case of bigger dimension

In [322]:
import numpy as np

In [323]:
x = np.array([[1,2,3]])
print(x)
x.shape

[[1 2 3]]


(1, 3)

In [324]:
y = np.array([[4],[5]])
print(y)
y.shape

[[4]
 [5]]


(2, 1)

In [325]:
x,y = np.broadcast_arrays(x,y)
print(x)
print(x.shape)
print(y)
print(y.shape)

[[1 2 3]
 [1 2 3]]
(2, 3)
[[4 4 4]
 [5 5 5]]
(2, 3)


In [326]:
x+y

array([[5, 6, 7],
       [6, 7, 8]])

In [327]:
x*y

array([[ 4,  8, 12],
       [ 5, 10, 15]])

# Pandas

In [328]:
import pandas as pd

In [329]:
# Pandas Data Structures
# Series -- a sequence of number 

In [330]:
s1 = pd.Series([1,2,3,5,8,13])

In [331]:
s1 # provides default index by series object
   # here index acts as key (like dictionary key/values)
   # position of newly inserted index does not matter

0     1
1     2
2     3
3     5
4     8
5    13
dtype: int64

In [332]:
s1[3]

5

In [333]:
s1[[3,4,5]] # similar to fancy indexing, accessing multiple values
            # list inside a list

3     5
4     8
5    13
dtype: int64

In [334]:
s1[2] = 4 # Modifying a value
s1

0     1
1     2
2     4
3     5
4     8
5    13
dtype: int64

In [335]:
# Modifying multiple values as a bunch

s1[[0,1]] = [66,77]
s1

0    66
1    77
2     4
3     5
4     8
5    13
dtype: int64

In [336]:
del s1[2] # deleting the index
s1

0    66
1    77
3     5
4     8
5    13
dtype: int64

# ERROR

In [337]:
# Multiple delete
#del s1[[0,1]]
#s1

In [338]:
s1[2] = 3 # inserting a new index
s1

0    66
1    77
3     5
4     8
5    13
2     3
dtype: int64

In [341]:
# multiple insertion

s1[[34,45,12]] = [33,44,55]
s1

ValueError: [34 45 12] not contained in the index

In [None]:
s2 = pd.Series([10,20,30,40], index=["oranges","apples","bananas","pears"])

In [None]:
s2

In [None]:
s2.index

In [342]:
s2.values

array([10, 20, 30, 40], dtype=int64)

In [343]:
ourIndex = ['oranges', 'apples', 'bananas', 'pears']
s2 = pd.Series([10,20,30,40], index = ourIndex)
s2

oranges    10
apples     20
bananas    30
pears      40
dtype: int64

In [344]:
s2>30 # compares 30 with s2's every value

oranges    False
apples     False
bananas    False
pears       True
dtype: bool

In [345]:
s2<30

oranges     True
apples      True
bananas    False
pears      False
dtype: bool

In [346]:
s2[s2<30] # fetches the content and displays the values

oranges    10
apples     20
dtype: int64

In [347]:
"peaches" in s2

False

In [348]:
"pears" in s2

True

In [349]:
"40" in s2 # outputs False but it is incorrect, doesnt work for values only keys
s2

oranges    10
apples     20
bananas    30
pears      40
dtype: int64

# Creating a series using a dictionary

In [350]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} # it is a dictionary
print(sdata)

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}


In [351]:
data = pd.Series(sdata) # generating a series using a dic
data

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [352]:
states = ['California', 'Ohio', 'Texas','North Carolina']
data1 = pd.Series(sdata, index = states) # changing indexes now, matches new index values with older indexes

In [353]:
data1  # NaN -- Not a Number means missing number

California            NaN
Ohio              35000.0
Texas             71000.0
North Carolina        NaN
dtype: float64

In [354]:
data1['California'] = 43000
data1

California        43000.0
Ohio              35000.0
Texas             71000.0
North Carolina        NaN
dtype: float64

In [355]:
data1.isnull() # tells which value is missing

California        False
Ohio              False
Texas             False
North Carolina     True
dtype: bool

In [356]:
data1.notnull() # tells what values are present

California         True
Ohio               True
Texas              True
North Carolina    False
dtype: bool

In [357]:
data1["North Carolina"] = 54000
data1

California        43000.0
Ohio              35000.0
Texas             71000.0
North Carolina    54000.0
dtype: float64

In [358]:
data1.isnull()

California        False
Ohio              False
Texas             False
North Carolina    False
dtype: bool

# Data Frames

In [359]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],        
        'year': [2000, 2001, 2002, 2001, 2002, 2003],        
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]} 

frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [360]:
table = { 'Name': ['Mahnoor','','','', 'Hafsa','','','', 'Sukaina','','','', 'Iqbal','','',''],
          'Exam': ['AI For Everyone', 'Python 1', 'Python 2','Git Test', 'AI For Everyone', 'Python 1', 'Python 2','Git Test', 'AI For Everyone', 'Python 1', 'Python 2','Git Test', 'AI For Everyone', 'Python 1', 'Python 2','Git Test' ],
          'Marks': [90,95,97,98,78,89,90,78,67,89,98,78,67,87,65,98]}
frame1 = pd.DataFrame(table)
frame1

Unnamed: 0,Name,Exam,Marks
0,Mahnoor,AI For Everyone,90
1,,Python 1,95
2,,Python 2,97
3,,Git Test,98
4,Hafsa,AI For Everyone,78
5,,Python 1,89
6,,Python 2,90
7,,Git Test,78
8,Sukaina,AI For Everyone,67
9,,Python 1,89


In [361]:
frame1['Marks']

0     90
1     95
2     97
3     98
4     78
5     89
6     90
7     78
8     67
9     89
10    98
11    78
12    67
13    87
14    65
15    98
Name: Marks, dtype: int64

In [362]:
frame1[['Exam','Marks']] # Multiple Column Selection

Unnamed: 0,Exam,Marks
0,AI For Everyone,90
1,Python 1,95
2,Python 2,97
3,Git Test,98
4,AI For Everyone,78
5,Python 1,89
6,Python 2,90
7,Git Test,78
8,AI For Everyone,67
9,Python 1,89


In [363]:
del frame1["Exam"]

In [364]:
frame1

Unnamed: 0,Name,Marks
0,Mahnoor,90
1,,95
2,,97
3,,98
4,Hafsa,78
5,,89
6,,90
7,,78
8,Sukaina,67
9,,89


In [365]:
frame1["Subject"] = "OOP"
frame1

Unnamed: 0,Name,Marks,Subject
0,Mahnoor,90,OOP
1,,95,OOP
2,,97,OOP
3,,98,OOP
4,Hafsa,78,OOP
5,,89,OOP
6,,90,OOP
7,,78,OOP
8,Sukaina,67,OOP
9,,89,OOP


In [366]:
frame1["Subject"] = ["OOP", "CP","DBMS","DSA", "OOP", "CP","DBMS","DSA", "OOP", "CP","DBMS","DSA", "OOP", "CP","DBMS","DSA"]
frame1

Unnamed: 0,Name,Marks,Subject
0,Mahnoor,90,OOP
1,,95,CP
2,,97,DBMS
3,,98,DSA
4,Hafsa,78,OOP
5,,89,CP
6,,90,DBMS
7,,78,DSA
8,Sukaina,67,OOP
9,,89,CP


In [367]:
frame1["Subject"] = range(16)
frame1

Unnamed: 0,Name,Marks,Subject
0,Mahnoor,90,0
1,,95,1
2,,97,2
3,,98,3
4,Hafsa,78,4
5,,89,5
6,,90,6
7,,78,7
8,Sukaina,67,8
9,,89,9


In [368]:
dic = {
      "AI For Everyone" : [100,87,90,76,32,89,86,85],
      "Python 1" : [100,15,97,96,35,87,85,45],
      "Python 2" : [32,47,98,26,93,78,67,87]
     }

df = pd.DataFrame(dic, index=["Mahnoor", "Laaebba", "Rida", "Neha","Madiha","Sifat","Meraj","Hamza"])
df

Unnamed: 0,AI For Everyone,Python 1,Python 2
Mahnoor,100,100,32
Laaebba,87,15,47
Rida,90,97,98
Neha,76,96,26
Madiha,32,35,93
Sifat,89,87,78
Meraj,86,85,67
Hamza,85,45,87


In [369]:
df["Total_Marks"] = range(8)
df

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks
Mahnoor,100,100,32,0
Laaebba,87,15,47,1
Rida,90,97,98,2
Neha,76,96,26,3
Madiha,32,35,93,4
Sifat,89,87,78,5
Meraj,86,85,67,6
Hamza,85,45,87,7


In [370]:
df["Total_Marks"] = df["AI For Everyone"] + df["Python 1"] + df["Python 2"]
df

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks
Mahnoor,100,100,32,232
Laaebba,87,15,47,149
Rida,90,97,98,285
Neha,76,96,26,198
Madiha,32,35,93,160
Sifat,89,87,78,254
Meraj,86,85,67,238
Hamza,85,45,87,217


In [371]:
#stat = df[df["Total Marks"] < 250]
#df["Results"] = df[stat]
#df

In [372]:
df.loc[df.Total_Marks <= 250, 'Status'] = 'Failed' 
df.loc[df.Total_Marks > 250, 'Status'] = 'Passed'

print(df)

         AI For Everyone  Python 1  Python 2  Total_Marks  Status
Mahnoor              100       100        32          232  Failed
Laaebba               87        15        47          149  Failed
Rida                  90        97        98          285  Passed
Neha                  76        96        26          198  Failed
Madiha                32        35        93          160  Failed
Sifat                 89        87        78          254  Passed
Meraj                 86        85        67          238  Failed
Hamza                 85        45        87          217  Failed


In [373]:
#Sir's method

df['Status'] = ["Pass" if marks >= 250 else "Fails" for marks in df.Total_Marks]
df

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status
Mahnoor,100,100,32,232,Fails
Laaebba,87,15,47,149,Fails
Rida,90,97,98,285,Pass
Neha,76,96,26,198,Fails
Madiha,32,35,93,160,Fails
Sifat,89,87,78,254,Pass
Meraj,86,85,67,238,Fails
Hamza,85,45,87,217,Fails


In [374]:
numbers = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
evenArr = []
oddArr = []
for num in numbers:
    if num%2 == 0:
        evenArr.append(num)
    else:
        oddArr.append(num)
        
print("Evens", evenArr)
print("Odds", oddArr)

Evens [2, 4, 6, 8, 10, 12, 14]
Odds [1, 3, 5, 7, 9, 11, 13, 15]


In [375]:
# list comprehension
[num for num in numbers if num%2==0]

[2, 4, 6, 8, 10, 12, 14]

In [376]:
df["Percentage"] = df["Total_Marks"]/300 * 100
df

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage
Mahnoor,100,100,32,232,Fails,77.333333
Laaebba,87,15,47,149,Fails,49.666667
Rida,90,97,98,285,Pass,95.0
Neha,76,96,26,198,Fails,66.0
Madiha,32,35,93,160,Fails,53.333333
Sifat,89,87,78,254,Pass,84.666667
Meraj,86,85,67,238,Fails,79.333333
Hamza,85,45,87,217,Fails,72.333333


In [377]:
#df.loc[df.AI For Everyone  <= 250, 'Status'] = 'Failed' 
#df.loc[df.Total_Marks > 250, 'Status'] = 'Passed'

#print(df)

In [378]:
df['Status2'] = ["Pass" if a >= 33 and b >= 33 and c >= 33 else "Fails" for a,b,c in zip(df['AI For Everyone'], df['Python 1'], df['Python 2'])]
df

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Mahnoor,100,100,32,232,Fails,77.333333,Fails
Laaebba,87,15,47,149,Fails,49.666667,Fails
Rida,90,97,98,285,Pass,95.0,Pass
Neha,76,96,26,198,Fails,66.0,Fails
Madiha,32,35,93,160,Fails,53.333333,Fails
Sifat,89,87,78,254,Pass,84.666667,Pass
Meraj,86,85,67,238,Fails,79.333333,Pass
Hamza,85,45,87,217,Fails,72.333333,Pass


In [379]:
a = [3,4,5,6]
b = [6,7,4,7]

c = zip(a,b)
for i in c:
    print(i)

(3, 6)
(4, 7)
(5, 4)
(6, 7)


In [380]:
df.head() # returns 5 first rows

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Mahnoor,100,100,32,232,Fails,77.333333,Fails
Laaebba,87,15,47,149,Fails,49.666667,Fails
Rida,90,97,98,285,Pass,95.0,Pass
Neha,76,96,26,198,Fails,66.0,Fails
Madiha,32,35,93,160,Fails,53.333333,Fails


In [381]:
df.head(4)

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Mahnoor,100,100,32,232,Fails,77.333333,Fails
Laaebba,87,15,47,149,Fails,49.666667,Fails
Rida,90,97,98,285,Pass,95.0,Pass
Neha,76,96,26,198,Fails,66.0,Fails


In [382]:
df.tail() # returns last 5 rows by default

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Neha,76,96,26,198,Fails,66.0,Fails
Madiha,32,35,93,160,Fails,53.333333,Fails
Sifat,89,87,78,254,Pass,84.666667,Pass
Meraj,86,85,67,238,Fails,79.333333,Pass
Hamza,85,45,87,217,Fails,72.333333,Pass


In [383]:
df.tail(3)

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Sifat,89,87,78,254,Pass,84.666667,Pass
Meraj,86,85,67,238,Fails,79.333333,Pass
Hamza,85,45,87,217,Fails,72.333333,Pass


In [385]:
df.loc['Hamza']

AI For Everyone         85
Python 1                45
Python 2                87
Total_Marks            217
Status               Fails
Percentage         72.3333
Status2               Pass
Name: Hamza, dtype: object

In [386]:
df.loc['Mahnoor':'Madiha']

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Mahnoor,100,100,32,232,Fails,77.333333,Fails
Laaebba,87,15,47,149,Fails,49.666667,Fails
Rida,90,97,98,285,Pass,95.0,Pass
Neha,76,96,26,198,Fails,66.0,Fails
Madiha,32,35,93,160,Fails,53.333333,Fails


In [387]:
df.iloc[0]

AI For Everyone        100
Python 1               100
Python 2                32
Total_Marks            232
Status               Fails
Percentage         77.3333
Status2              Fails
Name: Mahnoor, dtype: object

In [389]:
df.iloc[-1]

AI For Everyone         85
Python 1                45
Python 2                87
Total_Marks            217
Status               Fails
Percentage         72.3333
Status2               Pass
Name: Hamza, dtype: object

In [391]:
df.loc['Neha']['Python 2']

26

In [395]:
df.loc['Zahra'] = [4,5,6,7,5,6,7]

In [396]:
df

Unnamed: 0,AI For Everyone,Python 1,Python 2,Total_Marks,Status,Percentage,Status2
Mahnoor,100,100,32,232,Fails,77.333333,Fails
Laaebba,87,15,47,149,Fails,49.666667,Fails
Rida,90,97,98,285,Pass,95.0,Pass
Neha,76,96,26,198,Fails,66.0,Fails
Madiha,32,35,93,160,Fails,53.333333,Fails
Sifat,89,87,78,254,Pass,84.666667,Pass
Meraj,86,85,67,238,Fails,79.333333,Pass
Hamza,85,45,87,217,Fails,72.333333,Pass
Zahra,4,5,6,7,5,6.0,7
