In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.io as pio
import plotly.express as px
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#read the data
df = pd.read_csv(os.path.join(dirname, 'indexProcessed.csv'))
df.head(10)

In [None]:
#Change the type of data from string to datetime that allows easy handling of time-series analysis. 
df = df.astype({'Date':'datetime64[ns]'})
df['Year'] = df['Date'].dt.year

In [None]:
#Pull out the CloseUSD value based on the index and date
def getPrice(x, df):
    df = df.loc[(df['Index']  == x['Index'])  & (df['Date'] == x['Date'])]
    if len(df['CloseUSD'])>0:
        return df['CloseUSD'].values[0]
    return 0

In [None]:
#Look for last day of an year for an Index
df_last_date = df.groupby(['Index', 'Year']).agg({'Date':['max']})

#reduce the column hierarachy to one.
df_last_date.columns = df_last_date.columns.get_level_values(0)

df_last_date.reset_index(inplace = True)


#Look for the price in the main df dataframe for last date of a year for an Index
df_last_date['Price'] = df_last_date.apply(lambda x: getPrice(x, df), axis = 1)
df_last_date.head(10)                         

Now, I sliced the dataframe and pulled out all the close USD value for end of each year for each Index. I am in a position to calculate the YOY return for each Index

In [None]:
df_sorted = df_last_date.sort_values(by = ['Index', 'Year'])
df_sorted['ChangePer'] = df_sorted['Price'].pct_change()

pct_change function gave me YOY% change for each year. I need to remove first year for each index as those values are calcuated considering last value of a different index. 

In [None]:
#Removing those years which are not continouse from previous year
df_sorted['Change_in_year']  =  df_sorted['Year'].diff()
df_yoy = df_sorted[df_sorted['Change_in_year'] == 1]
df_yoy.drop(columns = ['Date', 'Price', 'Change_in_year'], inplace = True)
df_yoy

Lets create a timeseries animation for the data

In [None]:
#Adding graph elements
fig = px.bar(x = df_yoy['Index'], y = df_yoy['ChangePer'], animation_frame=df_yoy['Year'] )
   
fig.update_layout(title_text='YoY change of index')
fig.update_xaxes(title_text="<b> Index </b>")
fig.update_yaxes(title_text="<b> YoY change </b>")
fig.show()

In [None]:
d