## Numpy

In [39]:
import numpy as np

##### Number 1 Converting a list to n-dimensional NumPy array

In [40]:
numpy_array = np.array([1,2,3])

##### Number 2 Use of np.newaxis and np.reshape

In [42]:
a = [1,2,3,4,5]
list(a)
a_numpy = np.array(a)
#a_numpy.shape

row_vector = a_numpy[:,np.newaxis]
col_vector = a_numpy[np.newaxis,:]

a = list(range(0,15))
b = np.array(a).reshape(3,5)
b

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

#### Number 3 Converting any data type to NumPy array

In [43]:
a = [(1,2), [3,4,(5)],(6,7,8)]
b = np.asarray(a)
b

array([(1, 2), list([3, 4, 5]), (6, 7, 8)], dtype=object)

#### Number 4 Get an n-dimensional array of zeros.

In [44]:
a = np.zeros((3,4), dtype=float)
a

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

#### Number 5 Get an n-dimensional array of ones.

In [46]:
a = np.ones((3,4), dtype=np.int32)
a

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int32)

#### Number 6 np.full and np.empty

In [47]:
#np.full
b = np.full((2,3), 1, dtype=np.float16)
#np.empty
a = np.empty((2,2), dtype=np.int16)

print(b)
print(a)

[[1. 1. 1.]
 [1. 1. 1.]]
[[1 0]
 [0 0]]


#### Number 7 Getting an array of evenly spaced values with np.arrange and np.linspace

In [48]:
a = np.linspace(1,2,num=5,endpoint=False,retstep=True)
a
b= np.arange(1,2,0.1)
b

array([1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9])

#### Number 8 Finding the shape of the NumPy array

In [49]:
c = b.shape
c

(10,)

#### Number 9 Knowing the dimensions of the NumPy array

In [50]:
x = np.array([1,2,3])
y = x.ndim
y

1

#### Number 10 Finding the number of elements in the NumPy array

In [51]:
x = np.ones((3,2,4),dtype=np.int16)
y =x.size
y

24

#### Number 11 Get the memory space occupied by an n-dimensional array

In [52]:
y = x.nbytes
y

48

#### Number 12 Finding the data type of elements in the NumPy array

In [53]:
x = np.ones((2,3), dtype=np.int16)

y = x.dtype
y

dtype('int16')

#### Number 13 How to create a copy of NumPy array

In [54]:
y = np.array([[1,3],[5,6]])
x = np.copy(y)
x

array([[1, 3],
       [5, 6]])

#### Number 14 Get transpose of an n-d array

In [55]:
x = np.array([[1,2],[3,4]])
y = x.T
y

array([[1, 3],
       [2, 4]])

#### Number 15 Flatten an n-d array to get a one-dimensional array

In [58]:
x = np.array([[1,2,3],[4,5,9]])
y = x.reshape(-1)

z = np.array([[1,2,3],[4,5,6]])
w = z.ravel()
w

array([1, 2, 3, 4, 5, 6])

#### Number 16 Change axes of an n-d array or swap dimensions

In [59]:
x = np.ones((3,4,5))
x = np.moveaxis(x,[1,2],[0,-2])

x = np.array([[1,2],[3,4]])
y = np.swapaxes(x,0,1)
y

array([[1, 3],
       [2, 4]])

#### Number 17 Convert NumPy array to list

In [60]:
x = np.array([[3,4,5,9],[2,6,8,0]])
y = x.tolist()
y

[[3, 4, 5, 9], [2, 6, 8, 0]]

#### Number 18 Change the data type of elements in the NumPy array.

In [61]:
x = np.array([0,1,2.0,2.0,4.2])
y = x.astype(np.int16)

z = x.astype(np.bool)
z

array([False,  True,  True,  True,  True])

#### Number 19 Get indices of non-zero elements

In [62]:
x = np.array([0,1,2.0,2.0,4.2],dtype=np.float32)
y = x.nonzero()
y
x = np.array([[0,1],[3,5]])
y = x.nonzero()
y

(array([0, 1, 1]), array([1, 0, 1]))

#### Number 20  Sort NumPy array

In [63]:
x = np.array([[4,3],[3,2]])

y = np.sort(x, axis=1)
y

array([[3, 4],
       [2, 3]])

#### Number 21 Compare NumPy arrays to values

In [64]:
x = np.array([[0,1],[2,3]])
y = (x == 1).astype(np.int16).sum()
y

1

#### Number 22 Multiply two NumPy matrices

In [65]:
a = np.eye(2) #eye is an identity matrix, how fitting!

b = np.array([[1,2],[3,4]])

np.matmul(a,b)
np.multiply(a,b)

array([[1., 0.],
       [0., 4.]])

#### Number 23 Dot product of two arrays

In [66]:
a = np.array([[1,2,3],[4,8,16]])
b = np.array([5,6,11]).reshape(-1,1)
np.dot(a,b)
##I double checked my answer for the above and I got 50 and 244 both times so I am not sure if I got it wrong or the 
##example with the answer was right on the worksheet

a = np.array([[1,2,3,4]])
b = np.array([[4,5,6,7]]).reshape(-1,1)
np.dot(a,b)

array([[60]])

#### Number 24 Get cross-product of two Numpy vectors

In [67]:
x = [1,2,3]
y = [4,5,6]
z = np.cross(x,y)
z

array([-3,  6, -3])

#### Number 25 Getting gradient of an array

In [68]:
##Runs the gradient using Taylor series and central difference method
x = np.array([5,10,14,17,19,26], dtype=np.float16)
np.gradient(x)

array([5. , 4.5, 3.5, 2.5, 4.5, 7. ], dtype=float16)

#### Number 26 How to slice NumPy array

In [69]:
x = np.array([[2,4,9],[3,1,5],[7,8,0]])
##slice to get 2,4 7,8
x[[0,0,2,2],[0,1,0,1]]
##to get if rows and columns are continuous
x[[0,2],0:2]
#produces the same result

array([[2, 4],
       [7, 8]])

#### Number 27 Broadcasting

In [70]:
a = np.array([[3,5,8],[4,5,6],[9,7,2]])
b = np.array([2,3,4])

a+b

array([[ 5,  8, 12],
       [ 6,  8, 10],
       [11, 10,  6]])

## Pandas

In [71]:
import pandas as pd
from pandas import Series, DataFrame

#### Number 1 How to read data from a CSV file or a text file?

In [32]:
df = pd.read_csv('imdb_ratings.csv', sep=',', header = 0, index_col=False, names=None)
df

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
1,132823,8.3,Singin' in the Rain (1952),1952,1950
2,74178,8.3,All About Eve (1950),1950,1950
3,635139,8.6,Léon (1994),1994,1990
4,145514,8.2,The Elephant Man (1980),1980,1980
...,...,...,...,...,...
245,1078416,8.7,Forrest Gump (1994),1994,1990
246,31003,8.1,Le salaire de la peur (1953),1953,1950
247,167076,8.2,3 Idiots (2009),2009,2000
248,91689,8.1,Network (1976),1976,1970


#### Number 2 How to create a data frame using a dictionary of pre-existing columns?

In [10]:
#d_dic ={'Votes':Votes, 'Rank':Rank,'Title':Title,'Year':Year,'Decade':Decade} 
#df = pd.DataFrame(data = d_dic)
df = pd.DataFrame(df, columns = ['Votes', 'Rank','Title','Year','Decade'])
df

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
1,132823,8.3,Singin' in the Rain (1952),1952,1950
2,74178,8.3,All About Eve (1950),1950,1950
3,635139,8.6,Léon (1994),1994,1990
4,145514,8.2,The Elephant Man (1980),1980,1980
...,...,...,...,...,...
245,1078416,8.7,Forrest Gump (1994),1994,1990
246,31003,8.1,Le salaire de la peur (1953),1953,1950
247,167076,8.2,3 Idiots (2009),2009,2000
248,91689,8.1,Network (1976),1976,1970


#### Number 3 How to visualize the top and bottom x values in a data frame?

In [11]:
df.head(10)
df.tail(10)

col = df.columns

df[col].head(10)

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
1,132823,8.3,Singin' in the Rain (1952),1952,1950
2,74178,8.3,All About Eve (1950),1950,1950
3,635139,8.6,Léon (1994),1994,1990
4,145514,8.2,The Elephant Man (1980),1980,1980
5,425461,8.3,Full Metal Jacket (1987),1987,1980
6,441174,8.1,Gone Girl (2014),2014,2010
7,850601,8.3,Batman Begins (2005),2005,2000
8,37664,8.2,Judgment at Nuremberg (1961),1961,1960
9,46987,8.0,Relatos salvajes (2014),2014,2010


#### Number 4 How to rename one or more columns?

In [13]:
new_df = df.rename({'Rank':'Best'})
new_df

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
1,132823,8.3,Singin' in the Rain (1952),1952,1950
2,74178,8.3,All About Eve (1950),1950,1950
3,635139,8.6,Léon (1994),1994,1990
4,145514,8.2,The Elephant Man (1980),1980,1980
...,...,...,...,...,...
245,1078416,8.7,Forrest Gump (1994),1994,1990
246,31003,8.1,Le salaire de la peur (1953),1953,1950
247,167076,8.2,3 Idiots (2009),2009,2000
248,91689,8.1,Network (1976),1976,1970


#### Number 5 How to get column names in a list?

In [14]:
df.columns.tolist()

['Votes', 'Rank', 'Title', 'Year', 'Decade']

#### Number 6 How to get the frequency of values in a series?

In [15]:
#df[col].value_counts() -> error 'DataFrame' object has no attribute 'value_counts'
df.apply(pd.value_counts).fillna(0)
#df[col].apply(pd.Series.value_counts)
df[col].count()

Votes     250
Rank      250
Title     250
Year      250
Decade    250
dtype: int64

#### Number 7 How to reset an index to an existing column or another list or array?

In [16]:
new_df = df.reset_index(drop=True,inplace=False)
new_df

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
1,132823,8.3,Singin' in the Rain (1952),1952,1950
2,74178,8.3,All About Eve (1950),1950,1950
3,635139,8.6,Léon (1994),1994,1990
4,145514,8.2,The Elephant Man (1980),1980,1980
...,...,...,...,...,...
245,1078416,8.7,Forrest Gump (1994),1994,1990
246,31003,8.1,Le salaire de la peur (1953),1953,1950
247,167076,8.2,3 Idiots (2009),2009,2000
248,91689,8.1,Network (1976),1976,1970


#### Number 8 How to remove a column?

In [17]:
df.drop(columns = 'Decade')

Unnamed: 0,Votes,Rank,Title,Year
0,88355,8.4,M (1931),1931
1,132823,8.3,Singin' in the Rain (1952),1952
2,74178,8.3,All About Eve (1950),1950
3,635139,8.6,Léon (1994),1994
4,145514,8.2,The Elephant Man (1980),1980
...,...,...,...,...
245,1078416,8.7,Forrest Gump (1994),1994
246,31003,8.1,Le salaire de la peur (1953),1953
247,167076,8.2,3 Idiots (2009),2009
248,91689,8.1,Network (1976),1976


#### Number 9 How to change the index in a data frame?

In [22]:
df.set_index('Votes',inplace = True)

#### Number 10 How to remove rows or columns if they have nan values?

In [23]:
df.dropna(axis=0,inplace = True)

#### Number 11 How to slice a data frame given a condition?

In [26]:
mask = df['Rank'] == 8.4
result = df[mask]
result

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
15,192165,8.4,Mononoke-hime (1997),1997,1990
19,229533,8.4,Das Leben der Anderen (2006),2006,2000
38,532559,8.4,The Shining (1980),1980,1980
58,195266,8.4,Once Upon a Time in America (1984),1984,1980
82,198795,8.4,North by Northwest (1959),1959,1950
84,556811,8.4,Star Wars: Episode VI - Return of the Jedi (1983),1983,1980
95,436218,8.4,Aliens (1986),1986,1980
99,474068,8.4,Le fabuleux destin d'Amélie Poulain (2001),2001,2000
104,735056,8.4,American Beauty (1999),1999,1990


#### Number 12 How to slice a data frame given names of columns or index values of rows?

In [27]:
df.iat[5,2]

'Full Metal Jacket (1987)'

#### Number 13 How to iterate over rows?

In [28]:
for row in df.itertuples():
    print(row.Title)

M (1931)
Singin' in the Rain (1952)
All About Eve (1950)
Léon (1994)
The Elephant Man (1980)
Full Metal Jacket (1987)
Gone Girl (2014)
Batman Begins (2005)
Judgment at Nuremberg (1961)
Relatos salvajes (2014)
It Happened One Night (1934)
Gran Torino (2008)
Some Like It Hot (1959)
Inglourious Basterds (2009)
The Princess Bride (1987)
Mononoke-hime (1997)
Saving Private Ryan (1998)
Mr. Smith Goes to Washington (1939)
Butch Cassidy and the Sundance Kid (1969)
Das Leben der Anderen (2006)
In the Name of the Father (1993)
The Grapes of Wrath (1940)
The Godfather (1972)
Bom yeoreum gaeul gyeoul geurigo bom (2003)
V for Vendetta (2005)
Mary and Max (2009)
Warrior (2011)
Intouchables (2011)
La battaglia di Algeri (1966)
The Sting (1973)
Shutter Island (2010)
Jaws (1975)
Before Sunrise (1995)
2001: A Space Odyssey (1968)
12 Years a Slave (2013)
The Killing (1956)
Papillon (1973)
Lawrence of Arabia (1962)
The Shining (1980)
Cool Hand Luke (1967)
A Beautiful Mind (2001)
Inside Out (2015/I)
Dr. St

#### Number 14 How to sort by a column?

In [29]:
df
df.sort_values(['Rank'], ascending=False)

Unnamed: 0,Votes,Rank,Title,Year,Decade
22,1027398,9.2,The Godfather (1972),1972,1970
53,1498733,9.2,The Shawshank Redemption (1994),1994,1990
91,692753,9.0,The Godfather: Part II (1974),1974,1970
105,384187,8.9,12 Angry Men (1957),1957,1950
57,447875,8.9,"Il buono, il brutto, il cattivo (1966)",1966,1960
...,...,...,...,...,...
168,500576,8.0,"Monsters, Inc. (2001)",2001,2000
166,59578,8.0,The Big Sleep (1946),1946,1940
46,427099,8.0,X-Men: Days of Future Past (2014),2014,2010
51,87437,8.0,Roman Holiday (1953),1953,1950


#### Number 15 How to apply a function to each element to a series?

In [78]:
df[['Rank']].apply(np.sum, axis=1)

0      8.4
1      8.3
2      8.3
3      8.6
4      8.2
      ... 
245    8.7
246    8.1
247    8.2
248    8.1
249    8.3
Length: 250, dtype: float64

#### Number 16 How to apply a function to all elements in a data frame?

In [77]:
##was a little confused on this one but finally got it
new_df = df.applymap(lambda x: str(x) + '_X')
new_df

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355_X,8.4_X,M (1931)_X,1931_X,1930_X
1,132823_X,8.3_X,Singin' in the Rain (1952)_X,1952_X,1950_X
2,74178_X,8.3_X,All About Eve (1950)_X,1950_X,1950_X
3,635139_X,8.6_X,Léon (1994)_X,1994_X,1990_X
4,145514_X,8.2_X,The Elephant Man (1980)_X,1980_X,1980_X
...,...,...,...,...,...
245,1078416_X,8.7_X,Forrest Gump (1994)_X,1994_X,1990_X
246,31003_X,8.1_X,Le salaire de la peur (1953)_X,1953_X,1950_X
247,167076_X,8.2_X,3 Idiots (2009)_X,2009_X,2000_X
248,91689_X,8.1_X,Network (1976)_X,1976_X,1970_X


#### Number 17 How to slice a data frame if values of a series lie in a list?

In [34]:
mask = df[df['Rank'].isin([8.4])]
mask

Unnamed: 0,Votes,Rank,Title,Year,Decade
0,88355,8.4,M (1931),1931,1930
15,192165,8.4,Mononoke-hime (1997),1997,1990
19,229533,8.4,Das Leben der Anderen (2006),2006,2000
38,532559,8.4,The Shining (1980),1980,1980
58,195266,8.4,Once Upon a Time in America (1984),1984,1980
82,198795,8.4,North by Northwest (1959),1959,1950
84,556811,8.4,Star Wars: Episode VI - Return of the Jedi (1983),1983,1980
95,436218,8.4,Aliens (1986),1986,1980
99,474068,8.4,Le fabuleux destin d'Amélie Poulain (2001),2001,2000
104,735056,8.4,American Beauty (1999),1999,1990


#### Number 18 How to group-by column values and aggregate over another column or apply a function to it?

In [35]:
df.groupby(["Title","Year"]).agg({"Rank":[min]})

Unnamed: 0_level_0,Unnamed: 1_level_0,Rank
Unnamed: 0_level_1,Unnamed: 1_level_1,min
Title,Year,Unnamed: 2_level_2
12 Angry Men (1957),1957,8.9
12 Years a Slave (2013),2013,8.1
2001: A Space Odyssey (1968),1968,8.3
3 Idiots (2009),2009,8.2
8½ (1963),1963,8.1
...,...,...
Who's Afraid of Virginia Woolf? (1966),1966,8.0
Witness for the Prosecution (1957),1957,8.3
X-Men: Days of Future Past (2014),2014,8.0
Yip Man (2008),2008,8.0


#### Number 19 How to create duplicates for other columns for each element in a list of a particular column?

In [36]:
df['Rank'].explode()

0      8.4
1      8.3
2      8.3
3      8.6
4      8.2
      ... 
245    8.7
246    8.1
247    8.2
248    8.1
249    8.3
Name: Rank, Length: 250, dtype: float64

#### Number 20 How to concatenate two data frames?

In [37]:
df2 = pd.read_csv('imdb_ratings.csv', sep=',', header = 0, index_col=False, names=None)
result = pd.concat([df,df2],axis=1)
result

Unnamed: 0,Votes,Rank,Title,Year,Decade,Votes.1,Rank.1,Title.1,Year.1,Decade.1
0,88355,8.4,M (1931),1931,1930,88355,8.4,M (1931),1931,1930
1,132823,8.3,Singin' in the Rain (1952),1952,1950,132823,8.3,Singin' in the Rain (1952),1952,1950
2,74178,8.3,All About Eve (1950),1950,1950,74178,8.3,All About Eve (1950),1950,1950
3,635139,8.6,Léon (1994),1994,1990,635139,8.6,Léon (1994),1994,1990
4,145514,8.2,The Elephant Man (1980),1980,1980,145514,8.2,The Elephant Man (1980),1980,1980
...,...,...,...,...,...,...,...,...,...,...
245,1078416,8.7,Forrest Gump (1994),1994,1990,1078416,8.7,Forrest Gump (1994),1994,1990
246,31003,8.1,Le salaire de la peur (1953),1953,1950,31003,8.1,Le salaire de la peur (1953),1953,1950
247,167076,8.2,3 Idiots (2009),2009,2000,167076,8.2,3 Idiots (2009),2009,2000
248,91689,8.1,Network (1976),1976,1970,91689,8.1,Network (1976),1976,1970


#### Number 21 How to merge two data frames?

In [38]:
df.merge(df2, on=['Rank'], how='inner')

Unnamed: 0,Votes_x,Rank,Title_x,Year_x,Decade_x,Votes_y,Title_y,Year_y,Decade_y
0,88355,8.4,M (1931),1931,1930,88355,M (1931),1931,1930
1,88355,8.4,M (1931),1931,1930,192165,Mononoke-hime (1997),1997,1990
2,88355,8.4,M (1931),1931,1930,229533,Das Leben der Anderen (2006),2006,2000
3,88355,8.4,M (1931),1931,1930,532559,The Shining (1980),1980,1980
4,88355,8.4,M (1931),1931,1930,195266,Once Upon a Time in America (1984),1984,1980
...,...,...,...,...,...,...,...,...,...
9171,1177098,8.8,Fight Club (1999),1999,1990,1177098,Fight Club (1999),1999,1990
9172,1177098,8.8,Fight Club (1999),1999,1990,1099087,The Lord of the Rings: The Fellowship of the R...,2001,2000
9173,1099087,8.8,The Lord of the Rings: The Fellowship of the R...,2001,2000,1177098,Fight Club (1999),1999,1990
9174,1099087,8.8,The Lord of the Rings: The Fellowship of the R...,2001,2000,1099087,The Lord of the Rings: The Fellowship of the R...,2001,2000


# Additional Questions
### What are some useful things you can you do with numpy and pandas?
     Pandas and Numpy actually have some really cool operations. NumPy has cool implmentations for matrix multiplication and matrix manipulation. Pandas lets you manipulate csv files and create data frames which can be useful for analyzing large data sets and creating custom data frames.
### Why would those useful things be helpful in data analysis?
    Data analysis is all about analyzing data sets that can be tricky to understand in their raw format. Using these operations can help to clean, organize and manipulate data sets to better understand information.
### What challenges did you have in following the examples?
    My biggest challenge was learning the difference between pseudo-code and atucal code. It was sometimes confusing about what the operations were saying. I also noticed that for the Pandas section it jumped back and forth between a Series and a DataFrame so I had to do a bit of troubleshooting to make the csv file we were using to work for either operation. I am not entirley sure if I did each example correctly but I did what I thought was the outcome and tried my best to come ot the proper expression with solution. 