# G-Research Crypto Forecasting
# Test challenge
## Goal: Use your ML expertise to predict real crypto market data.
### This notebook mainly focuses on the Time Series Analysis

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
from pandas_datareader.data import DataReader
from datetime import datetime
import time

In [None]:
df=pd.read_csv('../input/g-research-crypto-forecasting/asset_details.csv')
print(df['Asset_Name'].tolist())

In [None]:
# The list of cypto we'll using are:
cypto_list = ['Bitcoin Cash', 'Binance Coin', 'Bitcoin', 'EOS.IO', 
             'Ethereum Classic', 'Ethereum', 'Litecoin', 'Monero', 
             'TRON', 'Stellar', 'Cardano', 'IOTA', 'Maker', 'Dogecoin']



In [None]:
df

In [None]:
df1=pd.read_csv('../input/g-research-crypto-forecasting/train.csv')
df1.head()

In [None]:
crypto_id = dict()
for i, j in zip(df['Asset_ID'].tolist(), df['Asset_Name'].tolist()):
    crypto_id[i]=j

In [None]:
df1['Asset_Name'] = [crypto_id[i] for i in df1['Asset_ID'].tolist()]
df1['Date'] = [time.ctime(int(i)) for i in df1['timestamp'].tolist()]

In [None]:
df1.index = df1['Date']
df1.head()

In [None]:
# Described Dogecoin's stock data
Dogecoin = df1[df1['Asset_Name']=='Dogecoin'].describe()
Dogecoin.describe()

In [None]:
# Information regarding Dogecoin's stock
Dogecoin.info()

In [None]:
# To see a historical view of the closing price. Closing price
# We will check the first 1000 examples for each crypto

plt.figure(figsize=(18, 128))

#plt.subplots_adjust(top=1.25, bottom=1.2)

for i, crypto in enumerate(cypto_list, 1):
    plt.subplot(14, 1, i)
    cypto_df = df1[df1['Asset_Name']==crypto]
    cypto_df['Close'].plot()
    plt.ylabel('Close')
    plt.xlabel(None)
    plt.title(f"{cypto_list[i - 1]}")
    plt.tight_layout()

In [None]:
# To see a historical view of the Volume price. Volume price
# We will check the first 1000 examples for each crypto

plt.figure(figsize=(18, 128))
#plt.subplots_adjust(top=1.25, bottom=1.2)

for i, crypto in enumerate(cypto_list, 1):
    plt.subplot(14, 1, i)
    cypto_df = df1[df1['Asset_Name']==crypto]
    cypto_df['Volume'].plot()
    plt.ylabel('Volume')
    plt.xlabel(None)
    plt.title(f"{cypto_list[i - 1]}")
    plt.tight_layout()

In [None]:
#Moving Average
ma_day = [10, 20, 50]

crypto = 'Dogecoin'
cypto_df = df1[df1['Asset_Name']==crypto]
for ma in ma_day:
    column_name = f"MA for {ma} days"
    cypto_df[column_name] = cypto_df['Close'].rolling(ma).mean()

In [None]:
print(cypto_df.columns)

In [None]:
cypto_df.hist(figsize=(24, 24))

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=1)
fig.set_figheight(8)
fig.set_figwidth(30)

cypto_df[['Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes)
axes.set_title('Dogecoin')

fig.tight_layout()

In [None]:
# We have used pct_change to find the percent change for each day
crypto = 'Dogecoin'
cypto_df = df1[df1['Asset_Name']==crypto]

cypto_df['Daily Return'] = cypto_df['Close'].pct_change()

# To plot the daily return percentage
fig, axes = plt.subplots(nrows=1, ncols=1)
fig.set_figheight(8)
fig.set_figwidth(30)

cypto_df['Daily Return'].plot(ax=axes, legend=True, linestyle='--', marker='o')
axes.set_title('Dogecoin')

fig.tight_layout()

In [None]:
# To get a better view of above chart
plt.figure(figsize=(18, 128))

for i, crypto in enumerate(cypto_list, 1):
    plt.subplot(14, 1, i)
    crypto_df = df1[df1['Asset_Name']==crypto]
    crypto_df['Daily Return'] = crypto_df['Close'].pct_change()
    sns.distplot(crypto_df['Daily Return'].dropna(), bins=100, color='red')
    plt.ylabel('Daily Return')
    plt.title(f'{cypto_list[i - 1]}')

In [None]:
# I am not sure yet if cypto_list have the same time frame which is clearly not,
# so I am taking the maximum possible examples from each crypto
# You need to take samples from the same dates
closing_df = pd.DataFrame()
for i, crypto in enumerate(cypto_list, 1):
    crypto_df = df1[df1['Asset_Name']==crypto]
    closing_df[crypto] = crypto_df['Close'].tolist()[:670497]

In [None]:
closing_df.head()

In [None]:
# Making a new DataFrame for returns
crypto_rets = closing_df.pct_change()
crypto_rets.head()

In [None]:
# Comparing Dogecoin to itself should show a perfectly linear relationship
sns.jointplot('Dogecoin', 'Dogecoin', crypto_rets, kind='scatter', color='seagreen')

In [None]:
# To compare the daily returns of Dogecoin and Bitcoin
sns.jointplot('Dogecoin', 'Bitcoin', crypto_rets, kind='scatter')

In [None]:
sns.pairplot(crypto_rets, kind='reg')

In [None]:
return_fig = sns.PairGrid(crypto_rets.dropna())

return_fig.map_upper(plt.scatter, color='purple')

In [None]:
returns_fig = sns.PairGrid(closing_df)

returns_fig.map_upper(plt.scatter,color='purple')

In [None]:
# Correlation plot for the daily returns
sns.heatmap(crypto_rets.corr(), annot=True, cmap='summer')

In [None]:
# Correlation plot for the closing price
sns.heatmap(closing_df.corr(), annot=True, cmap='summer')

In [None]:
rets = crypto_rets.dropna()

area = np.pi*20

plt.figure(figsize=(12, 10))
plt.scatter(rets.mean(), rets.std(), s=area)
plt.xlabel('Expected return')
plt.ylabel('Risk')

for label, x, y in zip(rets.columns, rets.mean(), rets.std()):
    plt.annotate(label, xy=(x, y), xytext=(50, 50), textcoords='offset points', ha='right', va='bottom', 
                 arrowprops=dict(arrowstyle='-', color='blue', connectionstyle='arc3,rad=-0.3'))