In [None]:
# Import Dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.linear_model import LinearRegression
import numpy as np

In [None]:
# Read in the files
consolidated_file=pd.read_csv('Resources/consolidated_coin_data.csv')
# crypto_prices=pd.read_csv('Resources/crypto_prices.csv')
crypto_markets=pd.read_csv('Resources/crypto-markets.csv')

In [None]:
# Convert to DataFrame
consolidated_df=pd.DataFrame(consolidated_file)
crypto_df=pd.DataFrame(crypto_markets)

In [None]:
# Drop out some columns from crypto_df
crypto_df=crypto_df[['name','date','open','high','low','close','volume','market','spread']]

In [None]:
# Convert the data from last 6 columns
column_list=['Open','High','Low','Close','Volume','Market Cap']
for x in column_list:
    consolidated_df[x] = consolidated_df[x].str.replace(',', '').astype(float)

In [None]:
# Rename the columns from the consolidated_df
consolidated_df=consolidated_df.rename(columns={'Currency':'name','Open':'open','High':'high','Date':'date','Low':'low','Close':'close','Volume':'volume','Market Cap':'market'})

In [None]:
# Double check the type for the DataFrame
consolidated_df.dtypes

In [None]:
# Display the newly updated DataFrame
consolidated_df

In [None]:
# Set the index to Currency column 
consolidated_df=consolidated_df.set_index('name')

In [None]:
# Filter out for just bitcoin
btc_df=consolidated_df.loc['bitcoin']

In [None]:
# Display the btc_df
btc_df

In [None]:
# Convert the date to Mm-yyyy format
btc_df['month-year']=pd.to_datetime(btc_df['date']).dt.to_period('M')

In [None]:
# Display the cleaned btc_df
btc_df

In [None]:
# Lower the values in the name column
crypto_df['name']=crypto_df['name'].str.lower()

In [None]:
# Convert date to MM-yyyy
crypto_df['month_year']=pd.to_datetime(crypto_df['date']).dt.to_period('M')

In [None]:
# Display the crypto_df
crypto_df

In [None]:
# Set the index to name
crypto_df=crypto_df.set_index('name')

In [None]:
# Filter out the bitcoin
btc1_df=crypto_df.loc['bitcoin']

In [None]:
# Display the btc1_df
btc1_df

In [None]:
# Join two dataframe
final_df=pd.concat([btc_df,btc1_df])

In [None]:

new_df=final_df[~pd.isnull(final_df['spread'])]

In [None]:
new_df

In [None]:
# Read in another file
disposable_income=pd.read_csv('disposable_inc.csv')
# 

In [None]:
income_df=pd.DataFrame(disposable_income)

In [None]:
income_df=income_df.drop('Unnamed: 0',axis=1)

In [None]:
income_df

In [None]:
crypto_df['month_year']=crypto_df['month_year'].astype('object')

In [None]:
# Join the new_df with the income_df
cleaned_df=income_df.merge(crypto_df,left_on='month_year',right_on='month_year')

In [None]:
cleaned_df

In [None]:
crypto_df.dtypes

In [None]:
cleaned_df['Date'].unique()

In [None]:
cleaned_df['Income'].unique()

In [None]:
income_df.dtypes

In [None]:
new_df.dtypes

In [None]:
new_df

In [None]:
new_df['average']=(new_df['open']+new_df['high']+new_df['low']+new_df['close'])/4

In [None]:
new_df

In [None]:
# Select the X and y for the machine
X=new_df.drop(['date','month-year','average','open','high','low','close'],axis=1)
y=new_df['average']

In [None]:
print(X.shape,y.shape)

In [None]:
# Create a Linear Regression model
model = LinearRegression()

In [None]:
# Fit in the model
model.fit(X,y)

In [None]:
print(model)

In [None]:
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_) 

In [None]:
predictions = model.predict(X)


In [None]:
prediction_df=pd.DataFrame({"Predicted": predictions, "Actual": y, "Error": predictions - y})[["Predicted", "Actual", "Error"]]

In [None]:
prediction_df