# Cryptocurrency Project

### Web Scraping

In [None]:
#Import libraries
import requests 
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [None]:
# List of 20 cryptocurrency
coin_list = ['BTC', 'ETH', 'XRP', 'ADA', 'USDT', 'DOGE', 'XLM', 'DOT', 'UNI', 'LINK', 'USDC', 'BCH', 'LTC', 'GRT', 'ETC', 'FIL', 'AAVE', 'ALGO', 'EOS']

In [None]:
# Creating the dataframe 
main_df = pd.DataFrame()

In [None]:
# Loop to get each coin data
for coin in coin_list:
  coin_df = pd.DataFrame()
  df = pd.DataFrame(index=[0])

  #Defining the start day and the end day 
  datetime_end = datetime(2021, 8, 10, 0, 0)
  datetime_check = datetime(2021, 8, 9, 0, 0)

  while len(df) > 0:
    if datetime_end == datetime_check:
        break

    datetime_start = datetime_end - relativedelta(hours=12)

    #Api for the scrapping
    url = 'https://production.api.coindesk.com/v2/price/values/'+ coin +'?start_date='+datetime_start.strftime("%Y-%m-%dT%H:%M") + '&end_date=' + datetime_end.strftime("%Y-%m-%dT%H:%M") + '&ohlc=true'

    #Fetching the data from the api in json format and the storing it into the dataframe 
    temp_data = requests.get(url).json()
    df = pd.DataFrame(temp_data['data']['entries'])
    df.columns = ['Timestamp','Open','High','Low','Close'] 

    #Handling missing data 
    insert_ids_list = [np.nan]

    # Sometimes CoinDesk did not collect every minute of data. So the nearest minute data will be substituted for the missing one.
    while len(insert_ids_list) > 0:
      timestamp_checking = np.array(df['Timestamp'][1:]) - np.array(df['Timestamp'][:-1])
      insert_ids_list = np.where(timestamp_checking!= 60000)[0]
      if len(insert_ids_list) > 0:
         print(str(len(insert_ids_list)) + ' mismatched.')
         insert_ids = insert_ids_list[0]
         temp_df = df.iloc[insert_ids.repeat(int(timestamp_checking[insert_ids]/60000)-1)].reset_index(drop=True)
         temp_df['Timestamp'] = [temp_df['Timestamp'][0] + i*60000 for i in range(1, len(temp_df)+1)]
         df = df.loc[:insert_ids].append(temp_df).append(df.loc[insert_ids+1:]).reset_index(drop=True)
         insert_ids_list = insert_ids_list[1:]             

    #Adding datatime and simbol to the dataframe
    df = df.drop(['Timestamp'], axis = 1)
    df['Datetime'] = [datetime_end - relativedelta(minutes=len(df) - i) for i in range(0, len(df))]
    coin_df = df.append(coin_df)
    datetime_end = datetime_start

  coin_df['Symbol'] = coin
  main_df = main_df.append(coin_df)



In [None]:
# Reset the index
main_df = main_df[['Datetime', 'Symbol', 'Open', 'High', 'Low', 'Close']].reset_index(drop=True)
main_df

Unnamed: 0,Datetime,Symbol,Open,High,Low,Close
0,2021-08-09 00:00:00,BTC,43803.202992,43876.664382,43802.295647,43876.664382
1,2021-08-09 00:01:00,BTC,43881.158179,43891.948013,43837.187244,43868.453002
2,2021-08-09 00:02:00,BTC,43871.523097,43930.541331,43870.567589,43897.698505
3,2021-08-09 00:03:00,BTC,43904.303248,43967.690329,43904.303248,43935.746009
4,2021-08-09 00:04:00,BTC,43930.494848,43953.111753,43773.230614,43774.193301
...,...,...,...,...,...,...
27355,2021-08-09 23:55:00,EOS,4.539179,4.542146,4.538307,4.541869
27356,2021-08-09 23:56:00,EOS,4.541853,4.541853,4.536734,4.536774
27357,2021-08-09 23:57:00,EOS,4.537165,4.539843,4.536113,4.538551
27358,2021-08-09 23:58:00,EOS,4.538569,4.542766,4.536796,4.536796


In [None]:
# Create csv file 
main_df.to_csv('main_df.csv', index = False)