# List

* https://docs.python.org/3/tutorial/datastructures.html
* https://www.programiz.com/python-programming/list
* https://developers.google.com/edu/python/lists

In [1]:
fruits = ['orange', 'apple', 'pear', 'banana', 'kiwi', 'apple', 'banana']
fruits

['orange', 'apple', 'pear', 'banana', 'kiwi', 'apple', 'banana']

In [2]:
#print element of index 2
print(fruits[2])

pear


In [3]:
#print last element
print(fruits[-1])

banana


In [4]:
#print every other element [start:stop:step]
print(fruits[::2])

['orange', 'pear', 'kiwi', 'banana']


In [5]:
#count number of occurrences for 'apple'
fruits.count('apple')

2

In [6]:
#count number of occurences for 'xyz
fruits.count('xyz')

0

In [7]:
#returns the first index of 'banana'
fruits.index('banana')

3

In [8]:
#returns the first index of 'banana' after the index of 4
fruits.index('banana',4)

6

In [9]:
#returns index of 'xyz' if it exists in list, otherwise, nothing happens
if 'xyz' in fruits:
  fruits.index('xyz')

In [10]:
#reverses fruits list
fruits.reverse()
fruits

['banana', 'apple', 'kiwi', 'banana', 'pear', 'apple', 'orange']

In [11]:
#removes 'banana' from list
fruits.remove('banana')
fruits

['apple', 'kiwi', 'banana', 'pear', 'apple', 'orange']

In [12]:
#adds 'grape' to end of list
fruits.append('grape')
fruits

['apple', 'kiwi', 'banana', 'pear', 'apple', 'orange', 'grape']

In [13]:
#sorts list by alpha order
fruits.sort()
fruits

['apple', 'apple', 'banana', 'grape', 'kiwi', 'orange', 'pear']

## List as Stack

In [14]:
#list being used as a stack
stack = [3,4,5]
stack

[3, 4, 5]

In [15]:
stack.append(6)
stack.append(7)
stack

[3, 4, 5, 6, 7]

In [16]:
#remove and return last element that was added into stack
stack.pop()

7

In [17]:
stack

[3, 4, 5, 6]

In [18]:
#print stack elements in the order that they were appended
for x in stack:
  print(x)

3
4
5
6


## Matrix

In [19]:
#list of lists, nested list comprehension
matrix = [
          [1,2,3,4],
          [5,6,7,8],
          [9,10,11,12] ]
print(matrix)

[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]


In [20]:
#length of list at index 0
len(matrix[0])

4

In [21]:
n = []
for i in range(len(matrix)): #traverses through the length of the entire list (matrix)
  for j in range(len(matrix[i])): #traverses through the length of each nested list
    n.append(matrix[i][j]) #append each element within each nested list within the entire matrix into list n  
#matrix[i][j] can be read different ways:
#matrix[column][row]
#matrix[nested list][element of nested list]
print(n)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]


In [22]:
#doing the same thing as the cell above, except in a much more cleaner and tidy way
n = [] #empty list (n)
for sublist in matrix: #traverse through each sublists within matrix list
  for val in sublist: #traverse through each value within each sublist
    n.append(val) #append each value from within each sublist from within the matrix list into the new empty list (n)
print(n)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]


In [23]:
#one-liner for code in cell 21, having the code in brackets, returns code out into a new list
[ matrix[i][j] for i in range(len(matrix)) for j in range(len(matrix[i])) ]
#this code is saying
#return a list containing values matrix[i][j]
#for [i] in the range of the length of the matrix, meaning the nested lists within the matrix
#and for [j] in the range of the length of the values within the nested lists that are in the matrix

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [24]:
#one-liner for code in cell 22, having the code in brackets, returns code out into a new list
[ val for sublist in matrix for val in sublist ]
#this code is saying
#return a list contationing values val
#for each sublist within the matrix
#and for each value val within each of those sublists

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

# Dictionary

* https://realpython.com/python-dicts/
* https://www.programiz.com/python-programming/dictionary

In [25]:
MLB_team = {
    'Colorado' : 'Rockies',
    'Boston'   : 'Red Sox',
    'Minnesota': 'Twins',
    'Milwaukee': 'Brewers',
    'Seattle'  : 'Mariners'
    }
MLB_team

{'Colorado': 'Rockies',
 'Boston': 'Red Sox',
 'Minnesota': 'Twins',
 'Milwaukee': 'Brewers',
 'Seattle': 'Mariners'}

In [26]:
MLB_team['Boston']

'Red Sox'

In [27]:
#MLB_team['Texas'] will return an error
print(MLB_team.get('Texas')) #if it does not exist it will return None
#.get is used to handle errors

None


In [28]:
MLB_team['Texas'] = 'Rangers'
MLB_team

{'Colorado': 'Rockies',
 'Boston': 'Red Sox',
 'Minnesota': 'Twins',
 'Milwaukee': 'Brewers',
 'Seattle': 'Mariners',
 'Texas': 'Rangers'}

In [29]:
MLB_team['Seattle'] = 'Seahawks'
MLB_team

{'Colorado': 'Rockies',
 'Boston': 'Red Sox',
 'Minnesota': 'Twins',
 'Milwaukee': 'Brewers',
 'Seattle': 'Seahawks',
 'Texas': 'Rangers'}

In [30]:
MLB_team.pop('Seattle')
# print(MLB_team)

'Seahawks'

In [31]:
del MLB_team['Seattle']
MLB_team

KeyError: 'Seattle'

In [None]:
#acesses all the keys and returns them in a list
list(MLB_team.keys())

In [None]:
#acesses all the values and returns them in a list
list(MLB_team.values())

# Numpy array / matrix

* https://numpy.org/doc/stable/user/quickstart.html
* https://www.programiz.com/python-programming/matrix

In [2]:
import numpy as np

In [5]:
#creates an array full of zeros, np.(what you want in the array)(how many times you want it)
print(np.zeros(5))

[0. 0. 0. 0. 0.]


In [6]:
#creates an array full of zeros, np.(what you want in the array)(rows, collumns)
#as floats
print(np.zeros((3,4)))

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [7]:
#creates an array full of zeros, np.(what you want in the array)(rows, collumns), as integers
print(np.ones((3,4), dtype=np.int16))

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [8]:
#create an array of a list
print(np.array([20,30,40,50]))

[20 30 40 50]


In [9]:
#create an array to the range of less than 4
print(np.arange(4))

[0 1 2 3]


In [10]:
#creates an array to the range of less than 2, incrementing by 0.3
print(np.arange(0,2,0.3))

[0.  0.3 0.6 0.9 1.2 1.5 1.8]


In [11]:
#creates an array to the range of less than 12, reshaped by rows of 3 and collumns of 4
print(np.arange(12).reshape(3,4))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [12]:
#a is the array of the list of values
a = np.array([20, 30, 40, 50])
print(a)

[20 30 40 50]


In [13]:
#b is the array of values to the range of less than 4
b = np.arange(4)
print(b)

[0 1 2 3]


In [15]:
#return a difference of the arrays
#my initial answer is the code returns the array [20 29 38 47]
#i was correct, except there was a difference in the output since the code was not within a print function
a-b

[20 29 38 47]


array([20, 29, 38, 47])

In [16]:
#return an addition of the arrays
a+b

array([20, 31, 42, 53])

In [17]:
#returns the values within the array b each raised to the power of 2
b**2

array([0, 1, 4, 9])

In [18]:
#dot product is a type of multiplication across the arrays. each index will multiply the coresponding index in the other array, and it will all be added
#(20*0)+(30*1)+(40*2)+(50*3)=260
a.dot(b)

260

In [19]:
A = np.array([[1, 1],
              [0, 1]])
print(A)

[[1 1]
 [0 1]]


In [20]:
B = np.array([[2, 0],
              [3, 4]])
print(B)

[[2 0]
 [3 4]]


In [21]:
#the product of the two arrays, element wise
A*B

array([[2, 0],
       [0, 4]])

In [24]:
#matrix multiplication
#(0,0) index is the dot product of the first row of A and the first column of B
#(0,1) index is the dot product of the first row of A and the second column of B
#(1,0) index is the dot product of the second row of A and the first column of B
#(1,1) index is the dot product of the second row of A and the second column of B
A@B

array([[5, 4],
       [3, 4]])

In [25]:
#this is another way to the matrix multiplication
A.dot(B)

array([[5, 4],
       [3, 4]])

In [26]:
#a is the array of a range less than 10, where the values are being raised to the power of 3
a = np.arange(10)**3
print(a)

[  0   1   8  27  64 125 216 343 512 729]


In [27]:
#return indexes 2 through 4
a[2:5]

array([ 8, 27, 64], dtype=int32)

In [28]:
#return indexes up to 8 incrementing by 2 
a[:8:2]

array([  0,   8,  64, 216], dtype=int32)

In [29]:
#return last index
a[-1]

729

In [30]:
#return array in reverse order
a[::-1]

array([729, 512, 343, 216, 125,  64,  27,   8,   1,   0], dtype=int32)

In [32]:
C = np.array([[[  0,  1,  2],
               [ 10, 12, 13]],
              [[100, 101, 102],
               [110, 112, 113]]])
print(C)

[[[  0   1   2]
  [ 10  12  13]]

 [[100 101 102]
  [110 112 113]]]


In [33]:
#return the shape of C, (2 subarrays, 2 rows each, 3 columns), 3d array, a collection of 2d arrays
print(C.shape)

(2, 2, 3)


In [49]:
a = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [50]:
b = a
print(b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [51]:
#in row [1] convert the first 2 elements into 0, changes it for all previous instances of array of b
#b[1][:2]=0 also works
b[1,:2] = 0
print(b)

[[ 0  1  2  3]
 [ 0  0  6  7]
 [ 8  9 10 11]]


In [52]:
#because we used b=a, instead of copying a, we also turned a's elements into zeros, because it's a reference
a+b

array([[ 0,  2,  4,  6],
       [ 0,  0, 12, 14],
       [16, 18, 20, 22]])

In [54]:
a

array([[ 0,  1,  2,  3],
       [ 0,  0,  6,  7],
       [ 8,  9, 10, 11]])

In [57]:
a = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [58]:
#make a separate copy, that can be changed without affecting the original variable, a completely new object
c = a.copy()
print(c)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [59]:
c[1,:2] = 0
print(c)

[[ 0  1  2  3]
 [ 0  0  6  7]
 [ 8  9 10 11]]


In [60]:
#a was not affected
a+c

array([[ 0,  2,  4,  6],
       [ 4,  5, 12, 14],
       [16, 18, 20, 22]])

# Pandas dataframe

* https://pandas.pydata.org/docs/user_guide/10min.html
* https://pandas.pydata.org/pandas-docs/dev/getting_started/tutorials.html
* https://www.learndatasci.com/tutorials/python-pandas-tutorial-complete-introduction-for-beginners/

In [3]:
import pandas as pd

In [4]:
a = [1083, 1714, 2124, 3424, 3343, 3793]
print(a)

[1083, 1714, 2124, 3424, 3343, 3793]


In [5]:
s = pd.Series(a)
print(s)

0    1083
1    1714
2    2124
3    3424
4    3343
5    3793
dtype: int64


In [4]:
#prefix the string with r (to produce a raw string), so that \r does not produce a Unicode error

df = pd.read_csv(r'C:\Users\rmani\OneDrive\Desktop\Artificial Intelligence\codes\courselist-spring22.csv')
print(df)

     Status Subject  Course Section    CRN                           Title  \
0      OPEN      CS    1063       1  20464     Intro to Comp Programming I   
1      OPEN      CS    1083       1  31537  Prog I for Computer Scientists   
2      OPEN      CS    1083       2  31538  Prog I for Computer Scientists   
3    CLOSED      CS    1083       3  31810  Prog I for Computer Scientists   
4      OPEN      CS    1083       4  35683  Prog I for Computer Scientists   
..      ...     ...     ...     ...    ...                             ...   
349    OPEN      CS    7316      22  37320           Doctoral Dissertation   
350    OPEN      CS    7316      23  37321           Doctoral Dissertation   
351    OPEN      CS    7316      24  37322           Doctoral Dissertation   
352    OPEN      CS    7316      25  37323           Doctoral Dissertation   
353    OPEN      CS    7316      26  37324           Doctoral Dissertation   

    Cred Meeting Days          Times              Meeting Type 

In [7]:
#prints only a snapshot, first 4-5 rows
print(df.head())

   Status Subject  Course Section    CRN                           Title Cred  \
0    OPEN      CS    1063       1  20464     Intro to Comp Programming I    3   
1    OPEN      CS    1083       1  31537  Prog I for Computer Scientists    3   
2    OPEN      CS    1083       2  31538  Prog I for Computer Scientists    3   
3  CLOSED      CS    1083       3  31810  Prog I for Computer Scientists    3   
4    OPEN      CS    1083       4  35683  Prog I for Computer Scientists    3   

  Meeting Days          Times              Meeting Type  ...   Location  \
0           MW    4:00-5:15pm  Online only, at set time  ...              
1           TR    4:00-5:15pm     Traditional in-person  ...  NPB 1.202   
2           TR  10:00-11:15am     Traditional in-person  ...  NPB 1.202   
3          MWF  10:00-10:50am  Online only, at set time  ...              
4           TR    1:00-2:15pm     Traditional in-person  ...  NPB 1.238   

                          Instructor         Date Weeks  Seats

In [8]:
#prints all the columns
print(df.columns)

Index(['Status', 'Subject', 'Course', 'Section', 'CRN', 'Title', 'Cred',
       'Meeting Days', 'Times', 'Meeting Type', 'Campus', 'Location',
       'Instructor', 'Date', 'Weeks', 'Seats', 'Enrolled', 'Available',
       'Wait List', 'Final Exam', 'Fees'],
      dtype='object')


In [9]:
#columns we don't care about gathered within a list
cols_to_drop = ['Meeting Days', 'Times', 'Meeting Type', 'Campus', 'Location', 'Date', 'Weeks', 'Final Exam', 'Wait List']
#axis=1 is for columns
#axiz=0 is for rows/index
#drop column for column within list of columns we dont care about if such column exitsts withinn the original csv file
df = df.drop([x for x in cols_to_drop if x in df.columns], axis=1)
print(df.columns)
print(df.head())

Index(['Status', 'Subject', 'Course', 'Section', 'CRN', 'Title', 'Cred',
       'Instructor', 'Seats', 'Enrolled', 'Available', 'Fees'],
      dtype='object')
   Status Subject  Course Section    CRN                           Title Cred  \
0    OPEN      CS    1063       1  20464     Intro to Comp Programming I    3   
1    OPEN      CS    1083       1  31537  Prog I for Computer Scientists    3   
2    OPEN      CS    1083       2  31538  Prog I for Computer Scientists    3   
3  CLOSED      CS    1083       3  31810  Prog I for Computer Scientists    3   
4    OPEN      CS    1083       4  35683  Prog I for Computer Scientists    3   

                          Instructor  Seats  Enrolled  Available   Fees  
0               Rutherford, Linda B.     75        70          5  $111   
1  Gomez Morales, Mauricio Alejandro     60        32         28  $111   
2  Gomez Morales, Mauricio Alejandro     60        53          7  $111   
3                     Rathore, Heena     65        65     

In [10]:
#datatype for each column
print(df.dtypes)

Status        object
Subject       object
Course         int64
Section       object
CRN            int64
Title         object
Cred          object
Instructor    object
Seats          int64
Enrolled       int64
Available      int64
Fees          object
dtype: object


In [11]:
#Cast a pandas object to a specified data type
df = df.astype({'CRN':str})
print(df.dtypes)

Status        object
Subject       object
Course         int64
Section       object
CRN           object
Title         object
Cred          object
Instructor    object
Seats          int64
Enrolled       int64
Available      int64
Fees          object
dtype: object


In [12]:
#convert cred list to numeric, errors='coerce' means it's being forced, so that we do not get any errors
df['Cred'] = pd.to_numeric(df['Cred'], errors='coerce')
print(df.dtypes)
#it has been turned into a float

Status         object
Subject        object
Course          int64
Section        object
CRN            object
Title          object
Cred          float64
Instructor     object
Seats           int64
Enrolled        int64
Available       int64
Fees           object
dtype: object


In [13]:
#remove $ from fees
#get the string for the object then replace $ with nothing
df['Fees'] = df['Fees'].str.replace('$','')
print(df[df['Course']==1714])

    Status Subject  Course Section    CRN                    Title  Cred  \
13    OPEN      CS    1714     0A1  36483  Computer Programming II   4.0   
14    OPEN      CS    1714     0AA  36484  Computer Programming II   NaN   
15    OPEN      CS    1714     0AB  36485  Computer Programming II   NaN   
16    OPEN      CS    1714     0B2  36622  Computer Programming II   4.0   
17    OPEN      CS    1714     0BA  36623  Computer Programming II   NaN   
18    OPEN      CS    1714     0BB  36626  Computer Programming II   NaN   
19    OPEN      CS    1714     0C3  36634  Computer Programming II   4.0   
20    OPEN      CS    1714     0CA  36635  Computer Programming II   NaN   
21  CLOSED      CS    1714     0CB  36638  Computer Programming II   NaN   
22    OPEN      CS    1714     0D4  36643  Computer Programming II   4.0   
23    OPEN      CS    1714     0DA  36646  Computer Programming II   NaN   
24    OPEN      CS    1714     0DB  36650  Computer Programming II   NaN   

          I

  df['Fees'] = df['Fees'].str.replace('$','')


In [14]:
#wrong way to find CS1714
#returns a boolean for rows that match/do not match
df['Course'] == 1714

0      False
1      False
2      False
3      False
4      False
       ...  
349    False
350    False
351    False
352    False
353    False
Name: Course, Length: 354, dtype: bool

In [15]:
#print df, when course in df file == CS1714
print(df[df['Course']==1714])

    Status Subject  Course Section    CRN                    Title  Cred  \
13    OPEN      CS    1714     0A1  36483  Computer Programming II   4.0   
14    OPEN      CS    1714     0AA  36484  Computer Programming II   NaN   
15    OPEN      CS    1714     0AB  36485  Computer Programming II   NaN   
16    OPEN      CS    1714     0B2  36622  Computer Programming II   4.0   
17    OPEN      CS    1714     0BA  36623  Computer Programming II   NaN   
18    OPEN      CS    1714     0BB  36626  Computer Programming II   NaN   
19    OPEN      CS    1714     0C3  36634  Computer Programming II   4.0   
20    OPEN      CS    1714     0CA  36635  Computer Programming II   NaN   
21  CLOSED      CS    1714     0CB  36638  Computer Programming II   NaN   
22    OPEN      CS    1714     0D4  36643  Computer Programming II   4.0   
23    OPEN      CS    1714     0DA  36646  Computer Programming II   NaN   
24    OPEN      CS    1714     0DB  36650  Computer Programming II   NaN   

          I

In [16]:
#replace NaN values with 0, NaN means not a number
df = df.fillna(0)
print(df[df['Course']==1714])

    Status Subject  Course Section    CRN                    Title  Cred  \
13    OPEN      CS    1714     0A1  36483  Computer Programming II   4.0   
14    OPEN      CS    1714     0AA  36484  Computer Programming II   0.0   
15    OPEN      CS    1714     0AB  36485  Computer Programming II   0.0   
16    OPEN      CS    1714     0B2  36622  Computer Programming II   4.0   
17    OPEN      CS    1714     0BA  36623  Computer Programming II   0.0   
18    OPEN      CS    1714     0BB  36626  Computer Programming II   0.0   
19    OPEN      CS    1714     0C3  36634  Computer Programming II   4.0   
20    OPEN      CS    1714     0CA  36635  Computer Programming II   0.0   
21  CLOSED      CS    1714     0CB  36638  Computer Programming II   0.0   
22    OPEN      CS    1714     0D4  36643  Computer Programming II   4.0   
23    OPEN      CS    1714     0DA  36646  Computer Programming II   0.0   
24    OPEN      CS    1714     0DB  36650  Computer Programming II   0.0   

          I

In [17]:
print(df['Instructor'].unique())

['Rutherford, Linda B.' 'Gomez Morales, Mauricio Alejandro'
 'Rathore, Heena' 'Long, Byron L.' 'Roberson, Dawnlee J.' 'Staff'
 'Jointer, Stanley' 'Desai, Kevin' 'Banerjee, Jishnu'
 'Sherette, Jonathan L.' 'Heaps, John S.' 'Pritom, Mir Mehedi A.'
 'Yang, Zhongxiu' 'Arafat, Imtiaz Muhammad' 'Anderson, Benjamin R.'
 'Lu, Qi' 'Dutta, Anandi K.' 'Gibson, Matthew R.' 'Silvestro, Sam A.'
 "O'Hara, Steven A." 'Korkmaz, Turgay  .' 'Ruan, Jianhua'
 'Valadez, Juan M.' 'Wang, Wei' 'Ortiz, John A.' 'Murphy, Richard L.'
 'Arora, Ritu' 'Fernandez, Amanda S.' 'Harrison, Keith B.' 'Sandhu, Ravi'
 'Quarles, John P.' 'Zhu, Dakai' 'Ku, Bernard S.' 'Rutherford, James R.'
 'Slavin, Rocky L.' 'Najafirad, Peyman' 'Wang, Xiaoyin'
 'White, Gregory B.' 'Xie, Mimi' 'Lama, Palden' 'Jha, Sumit K.'
 'Boppana, Rajendra V.' 'Niu, Jianwei' 'Jadliwala, Murtuza'
 'Prasad, Sushil']


# Matplotlib

* https://matplotlib.org/stable/tutorials/index.html
* https://www.datacamp.com/tutorial/matplotlib-tutorial-python
* https://realpython.com/python-matplotlib-guide/
* https://www.geeksforgeeks.org/matplotlib-tutorial/

In [5]:
import matplotlib.pyplot as plt

In [6]:
df_plot=df.copy()