# Matplot Example

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
x=list(range(-10,11))
y=[]
for i in x:
    y.append(i*i)

#one line version
#y=[i**2 for i in x]
#Lambda function method
#y=[lambda i: i**2 for i in x]

plt.plot(x,y, c="green")
plt.title("polynomial $y=x^2$")


# Plot directly from a Pandas DataFrame

In [None]:
import pandas as pd
#One way
#input=[x,y]
#input=pd.DataFrame(input).transpose
#Another way
df=pd.DataFrame(data=y,index=x)
df.plot(legend=None)


# Making an animated plot

In [None]:
df=pd.read_excel("/home/spiced/Downloads/gapminder_lifeexpectancy.xlsx")
df.set_index("Life expectancy", inplace=True)
year=1800
avg_life=int(df[year].mean())

#histogram for the year 1800

df[year].hist()
#plt.title("Year: {0}, Avg. Life Exp: {1}".format(year, avg_life)) ### .format() way
plt.title(f"Year: {year}, Avg. Life Exp: {avg_life}") ### f-string way
plt.savefig(f"frame_{year}.png")

In [None]:
import imageio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_excel("/home/spiced/Downloads/gapminder_lifeexpectancy.xlsx")
df.set_index("Life expectancy", inplace=True)

#step 1: Create a list of numbers from 1800 to 2016
years=list(df.columns)

images=[]
#for y in years:
for y in range(years[0],years[-1]+1,20): # increasing the steps for loop to make it faster
    avg_life=round(df[y].mean())
    df[y].hist()
    #plt.title("Year: {0}, Avg. Life Exp: {1}".format(year, avg_life)) ### .format() way
    plt.title(f"Year: {y}, Avg. Life Exp: {avg_life}") ### f-string way
    plt.axis([0,100,0,100])
    plt.savefig(f"frame_{y}.png")
    
    ### Use imageio to read the file we just saved, convert it to an array and append it to a list
    im=imageio.imread(f"frame_{y}.png")
    images.append(im)
    plt.figure()
    imageio.mimsave("life_expectancy.gif",images, fps=2)

# Afternoon Course with Kristian

In [None]:
import pandas as pd
df=pd.DataFrame({
    'names':['Justin','Diana','Carolina','Anita','Nadja',
             'Heiki','Felix','Tolga','Artem','Josue','Kristian'],
    'country':['USA','Armenia','Brazil','Nigeria','Germany',
              'Germany','Germany','Turkey','Russia','USA','Finland'],
    'last_name':['B','M','C','O','A',
                'Q','F','S','P','M','R']
})

In [None]:
df.head()

In [None]:
df2.loc[['Felix','Diana']]

# Move one column to the index

In [None]:
#set index 
df2=df.set_index('names')
df2.head(3)

# Move index to a column

In [None]:
#df2.reset_index(inplace='True') #repeats adding new columns each time you run
df3=df2.reset_index() # another and more proper way of changing the index
df3.head(3)

# Groupby function

groupby consists of 3 parts:

* column(s) to group by
* column(s) that you select
* aggregation function

In [None]:
df2.groupby('country')[['last_name']].count() #pay attention to single/double brackets difference

In [None]:
df2['nchars']=df['names'].str.len().values #without 'values' part it can work only when 
# the index column is default. otherwise, it cannot recognize the index to assigned and doesnt work.
# in our case we assigned the names as index, that is why we use '.values' part of the function
df2.head(3)

In [None]:
df2.groupby('country')[['nchars']].mean()
df2.groupby('country')[['nchars']].describe()

# Create a two-part index (hierarchical index)

In [None]:
df5=df2.groupby(['nchars','country'])[['last_name']].count()
df5

In [None]:
df5.loc[[(8,'Finland')]] # one way to use tuples to call the element of the grouped 
#dataframe with multiple indexes (tuples can contain more than 2 elements)


# Long and Wide data format

In [None]:
# to see the figure after running the code, we have to add next line at the beginnings of our codes everytime!
%matplotlib inline 
df6=df5.unstack().fillna(0.0).astype(int)['last_name'] # '.fillna() func. is changing the 'Nan' values with '0.0'
# 'astype(int) convert values from float to int'
df6.plot.bar()

In [None]:
df7=df5.unstack(0).fillna(0.0).astype(int) # '.fillna() func. is changing the 'Nan' values with '0.0'
# 'astype(int) convert values from float to int'
# assigning number to unstack() changes the 'shape' of the dataframe respect to the number of
# groupby parameters. in this case we used two columns respectly to groupby and we can use 0 or 1
# for unstacking. number indexing for unstack() is from left to right (opposite way of axis assigning) because
# the last group is (smallest) is written at the last in groupby() function
#if we group by respect to 4 parameters, we can use 0,1,2 or 3 for unstacking.
df7.plot.bar() # pay attention to the difference between the label titles between to figures!!!!