In [28]:
# Import Dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.linear_model import LinearRegression
import numpy as np

In [2]:
# Read in the files
consolidated_file=pd.read_csv('Resources/consolidated_coin_data.csv')
# crypto_prices=pd.read_csv('Resources/crypto_prices.csv')
crypto_markets=pd.read_csv('Resources/crypto-markets.csv')

In [3]:
# Convert to DataFrame
consolidated_df=pd.DataFrame(consolidated_file)
crypto_df=pd.DataFrame(crypto_markets)

In [4]:
# Drop out some columns from crypto_df
crypto_df=crypto_df[['name','date','open','high','low','close','volume','market','spread']]

In [5]:
# Convert the data from last 6 columns
column_list=['Open','High','Low','Close','Volume','Market Cap']
for x in column_list:
    consolidated_df[x] = consolidated_df[x].str.replace(',', '').astype(float)

In [6]:
# Rename the columns from the consolidated_df
consolidated_df=consolidated_df.rename(columns={'Currency':'name','Open':'open','High':'high','Date':'date','Low':'low','Close':'close','Volume':'volume','Market Cap':'market'})

In [7]:
# Double check the type for the DataFrame
consolidated_df.dtypes

name       object
date       object
open      float64
high      float64
low       float64
close     float64
volume    float64
market    float64
dtype: object

In [8]:
# Display the newly updated DataFrame
consolidated_df

Unnamed: 0,name,date,open,high,low,close,volume,market
0,tezos,"Dec 04, 2019",1.29,1.32,1.25,1.25,46048752.0,824588509.0
1,tezos,"Dec 03, 2019",1.24,1.32,1.21,1.29,41462224.0,853213342.0
2,tezos,"Dec 02, 2019",1.25,1.26,1.20,1.24,27574097.0,817872179.0
3,tezos,"Dec 01, 2019",1.33,1.34,1.25,1.25,24127567.0,828296390.0
4,tezos,"Nov 30, 2019",1.31,1.37,1.31,1.33,28706667.0,879181680.0
...,...,...,...,...,...,...,...,...
28939,bitcoin-sv,"May 02, 2013",3.78,4.04,3.01,3.37,0.0,58287979.0
28940,bitcoin-sv,"May 01, 2013",4.29,4.36,3.52,3.80,0.0,65604596.0
28941,bitcoin-sv,"Apr 30, 2013",4.40,4.57,4.17,4.30,0.0,74020918.0
28942,bitcoin-sv,"Apr 29, 2013",4.37,4.57,4.23,4.38,0.0,75388964.0


In [9]:
# Set the index to Currency column 
consolidated_df=consolidated_df.set_index('name')

In [10]:
# Filter out for just bitcoin
btc_df=consolidated_df.loc['bitcoin']

In [11]:
# Display the btc_df
btc_df

Unnamed: 0_level_0,date,open,high,low,close,volume,market
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
bitcoin,"Dec 04, 2019",7320.13,7539.78,7170.92,7252.03,2.166424e+10,1.311431e+11
bitcoin,"Dec 03, 2019",7323.98,7418.86,7229.36,7320.15,1.479749e+10,1.323599e+11
bitcoin,"Dec 02, 2019",7424.04,7474.82,7233.40,7321.99,1.708204e+10,1.323790e+11
bitcoin,"Dec 01, 2019",7571.62,7571.62,7291.34,7424.29,1.872071e+10,1.342151e+11
bitcoin,"Nov 30, 2019",7764.06,7836.10,7515.85,7569.63,1.715819e+10,1.368294e+11
...,...,...,...,...,...,...,...
bitcoin,"May 02, 2013",116.38,125.60,92.28,105.21,0.000000e+00,1.168517e+09
bitcoin,"May 01, 2013",139.00,139.89,107.72,116.99,0.000000e+00,1.298955e+09
bitcoin,"Apr 30, 2013",144.00,146.93,134.05,139.00,0.000000e+00,1.542813e+09
bitcoin,"Apr 29, 2013",134.44,147.49,134.00,144.54,0.000000e+00,1.603769e+09


In [12]:
# Convert the date to Mm-yyyy format
btc_df['month-year']=pd.to_datetime(btc_df['date']).dt.to_period('M')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [13]:
# Display the cleaned btc_df
btc_df

Unnamed: 0_level_0,date,open,high,low,close,volume,market,month-year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
bitcoin,"Dec 04, 2019",7320.13,7539.78,7170.92,7252.03,2.166424e+10,1.311431e+11,2019-12
bitcoin,"Dec 03, 2019",7323.98,7418.86,7229.36,7320.15,1.479749e+10,1.323599e+11,2019-12
bitcoin,"Dec 02, 2019",7424.04,7474.82,7233.40,7321.99,1.708204e+10,1.323790e+11,2019-12
bitcoin,"Dec 01, 2019",7571.62,7571.62,7291.34,7424.29,1.872071e+10,1.342151e+11,2019-12
bitcoin,"Nov 30, 2019",7764.06,7836.10,7515.85,7569.63,1.715819e+10,1.368294e+11,2019-11
...,...,...,...,...,...,...,...,...
bitcoin,"May 02, 2013",116.38,125.60,92.28,105.21,0.000000e+00,1.168517e+09,2013-05
bitcoin,"May 01, 2013",139.00,139.89,107.72,116.99,0.000000e+00,1.298955e+09,2013-05
bitcoin,"Apr 30, 2013",144.00,146.93,134.05,139.00,0.000000e+00,1.542813e+09,2013-04
bitcoin,"Apr 29, 2013",134.44,147.49,134.00,144.54,0.000000e+00,1.603769e+09,2013-04


In [14]:
# Lower the values in the name column
crypto_df['name']=crypto_df['name'].str.lower()

In [15]:
# Convert date to MM-yyyy
crypto_df['month-year']=pd.to_datetime(crypto_df['date']).dt.to_period('M')

In [16]:
# Display the crypto_df
crypto_df

Unnamed: 0,name,date,open,high,low,close,volume,market,spread,month-year
0,bitcoin,2013-04-28,135.30,135.98,132.10,134.21,0.0,1.488567e+09,3.88,2013-04
1,bitcoin,2013-04-29,134.44,147.49,134.00,144.54,0.0,1.603769e+09,13.49,2013-04
2,bitcoin,2013-04-30,144.00,146.93,134.05,139.00,0.0,1.542813e+09,12.88,2013-04
3,bitcoin,2013-05-01,139.00,139.89,107.72,116.99,0.0,1.298955e+09,32.17,2013-05
4,bitcoin,2013-05-02,116.38,125.60,92.28,105.21,0.0,1.168517e+09,33.32,2013-05
...,...,...,...,...,...,...,...,...,...,...
942292,project-x,2018-11-26,22424.60,23049.30,17607.50,20724.80,143.0,1.622000e+03,5441.80,2018-11
942293,project-x,2018-11-27,20711.90,23742.10,19292.90,23298.70,637.0,1.823000e+03,4449.20,2018-11
942294,project-x,2018-11-28,23320.90,25235.10,19888.10,24442.70,49.0,1.913000e+03,5347.00,2018-11
942295,project-x,2018-11-29,24543.53,24582.47,21457.95,22839.44,153.0,1.788000e+03,3124.52,2018-11


In [17]:
# Set the index to name
crypto_df=crypto_df.set_index('name')

In [18]:
# Filter out the bitcoin
btc1_df=crypto_df.loc['bitcoin']

In [19]:
# Display the btc1_df
btc1_df

Unnamed: 0_level_0,date,open,high,low,close,volume,market,spread,month-year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
bitcoin,2013-04-28,135.30,135.98,132.10,134.21,0.000000e+00,1.488567e+09,3.88,2013-04
bitcoin,2013-04-29,134.44,147.49,134.00,144.54,0.000000e+00,1.603769e+09,13.49,2013-04
bitcoin,2013-04-30,144.00,146.93,134.05,139.00,0.000000e+00,1.542813e+09,12.88,2013-04
bitcoin,2013-05-01,139.00,139.89,107.72,116.99,0.000000e+00,1.298955e+09,32.17,2013-05
bitcoin,2013-05-02,116.38,125.60,92.28,105.21,0.000000e+00,1.168517e+09,33.32,2013-05
...,...,...,...,...,...,...,...,...,...
bitcoin,2018-11-25,3880.78,4120.87,3585.06,4009.97,6.825640e+09,6.974927e+10,535.81,2018-11
bitcoin,2018-11-26,4015.07,4107.14,3643.92,3779.13,6.476900e+09,6.573929e+10,463.22,2018-11
bitcoin,2018-11-27,3765.95,3862.96,3661.01,3820.72,5.998720e+09,6.646897e+10,201.95,2018-11
bitcoin,2018-11-28,3822.47,4385.90,3822.47,4257.42,7.280280e+09,7.407256e+10,563.43,2018-11


In [25]:
# Join two dataframe
final_df=pd.concat([btc_df,btc1_df])

In [40]:

new_df=final_df[~pd.isnull(final_df['spread'])]

In [41]:
new_df

Unnamed: 0_level_0,date,open,high,low,close,volume,market,month-year,spread
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
bitcoin,2013-04-28,135.30,135.98,132.10,134.21,0.000000e+00,1.488567e+09,2013-04,3.88
bitcoin,2013-04-29,134.44,147.49,134.00,144.54,0.000000e+00,1.603769e+09,2013-04,13.49
bitcoin,2013-04-30,144.00,146.93,134.05,139.00,0.000000e+00,1.542813e+09,2013-04,12.88
bitcoin,2013-05-01,139.00,139.89,107.72,116.99,0.000000e+00,1.298955e+09,2013-05,32.17
bitcoin,2013-05-02,116.38,125.60,92.28,105.21,0.000000e+00,1.168517e+09,2013-05,33.32
...,...,...,...,...,...,...,...,...,...
bitcoin,2018-11-25,3880.78,4120.87,3585.06,4009.97,6.825640e+09,6.974927e+10,2018-11,535.81
bitcoin,2018-11-26,4015.07,4107.14,3643.92,3779.13,6.476900e+09,6.573929e+10,2018-11,463.22
bitcoin,2018-11-27,3765.95,3862.96,3661.01,3820.72,5.998720e+09,6.646897e+10,2018-11,201.95
bitcoin,2018-11-28,3822.47,4385.90,3822.47,4257.42,7.280280e+09,7.407256e+10,2018-11,563.43


In [None]:
# Select the X and y for the machine
X=consolidated_df.drop([],axis=1)
y=consolidated_df['Average']

In [None]:
print(X.shape,y.shape)

In [None]:
# Create a Linear Regression model
model = LinearRegression()

In [None]:
# Fit in the model
model.fit(X,y)

In [None]:
print(model)

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 