# Data Visualization Task
This notebook contains some EDA actions on the collected data from 3 topic sources: weather, (fake) registered users, and cryptocurrency price (BTC & ETH). Please make sure you let the pipeline run for long enough time to have meaningful visualization

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from cassandrautils import *

In [None]:
faker = getFakerDF()
binance = getBinanceDF()
weather = getWeatherDF()

In [None]:
weather

In [None]:
binance

In [None]:
faker

## Weather Analysis

In [None]:
weather.dtypes

In [None]:
# Convert Kelvin to Celsius
weather[['feels_like','temp','temp_max','temp_min']] = weather[['feels_like','temp','temp_max','temp_min']].transform(lambda x: x - 273.15)
weather

In [None]:
plt.figure(figsize=(15,5))
sns.lineplot(data=weather, x="forecastdate", y="temp", hue="location")
plt.xlabel("Time")
plt.ylabel("Temperature (C)")
plt.title("Temperature among cities")

In [None]:
plt.figure(figsize=(15,5))
sns.lineplot(data=weather, x="forecastdate", y="feels_like", hue="location")
plt.xlabel("Time")
plt.ylabel("Temperature (C)")
plt.title("Feel Like Temperature among cities")

In [None]:
plt.figure(figsize=(15,5))
sns.lineplot(data=weather, x="forecastdate", y="wind", hue="location")
plt.xlabel("Time")
plt.ylabel("Wind Speed")
plt.title("Wind among cities")

## Registered Users Analysis

In [None]:
# Create new column age from year
import datetime
current_year = datetime.datetime.now().year #get current year

faker['age'] = faker['year'].transform(lambda x: current_year - x + 1)
faker.head()

In [None]:
# Get car color occurance

plt.figure(figsize=(15,5))
sns.countplot(x=faker["safe_color_name"])

In [None]:
# Get age distribution
faker.describe()

In [None]:
sns.boxplot(x=faker["age"])

## BTC and ETH Analysis

In [None]:
btc_df = binance[(binance.pair == "BTCUSDT")]
eth_df = binance[(binance.pair == "ETHUSDT")]

btc_df.head()

In [None]:
plt.figure(figsize=(15,5))
sns.lineplot(data=btc_df, x="datetime", y="close_price")
plt.xlabel("Time")
plt.ylabel("USD")
plt.title("BTC Price - 1m interval")

In [None]:
plt.figure(figsize=(15,5))
sns.lineplot(data=eth_df, x="datetime", y="close_price")
plt.xlabel("Time")
plt.ylabel("USD")
plt.title("ETH Price - 1m interval")

In [None]:
# Install once only

# !pip install prophet

In [None]:
# Prophet to predict time-series price value 
from prophet import Prophet
btc_prophet = btc_df[['datetime','close_price']]
btc_prophet.columns = ['ds','y']
eth_prophet = eth_df[['datetime','close_price']]
eth_prophet.columns = ['ds','y']
btc_prophet

In [None]:
mBTC = Prophet()
mBTC.fit(btc_prophet)
future = mBTC.make_future_dataframe(periods=60, freq= '1min') 
fcst = mBTC.predict(future)
fig = mBTC.plot(fcst)

In [None]:
mETH = Prophet()
mETH.fit(eth_prophet)
future = mETH.make_future_dataframe(periods=60, freq= '1min') 
fcst = mETH.predict(future)
fig = mETH.plot(fcst)

From the model, we can observed that the current collected data points are still a few that the model cannot predict the future outcome in a smaller range. 