In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(rc = {'figure.figsize' : (14,12)})

import warnings
warnings.filterwarnings('ignore')

In [None]:
litecoin_df = pd.read_csv('../input/litecoin-data/Litecoin historic data.csv')
litecoin_df

In [None]:
litecoin_df.info()

In [None]:
litecoin_df.describe()

## Removing 'M' and 'K' in Volume column.

In [None]:
for x in range(litecoin_df['Vol.'].shape[0]):
    val = litecoin_df["Vol."].iloc[x]
    if "M" in val:
        #print(x[:-1])
        litecoin_df["Vol."].iloc[x] = float(val[:-1]) * 1000000
        pass
    elif "K" in val:
        #print(val[:-1] )
        litecoin_df["Vol."].iloc[x] = float(val[:-1]) * 1000
litecoin_df["Vol."]

In [None]:
litecoin_df['Vol.'] = litecoin_df['Vol.'].astype(float)

## Adding new features

#### New Feature - Day since started

Assumption here is that - recently started crypto would have lesser volume, lesser price( and lesser users), as compared to older ones.

In [None]:
litecoin_df['Day_since_started'] = sorted(litecoin_df.index.to_list() , reverse = True)

#### Changing the data type for 'Date' column

In [None]:
litecoin_df['Date'] = pd.to_datetime(litecoin_df['Date']) #.astype('datetimetype')

#### New Feature - Date Month Year 

- People would trade more on weekends? - when they have more time
- Year, day of month might not be relevant.

In [None]:
litecoin_df['day'] = litecoin_df['Date'].dt.day
litecoin_df['month'] = litecoin_df['Date'].dt.month
litecoin_df['year'] = litecoin_df['Date'].dt.year
litecoin_df['week_of_year'] = litecoin_df['Date'].dt.week

litecoin_df['weekday'] = litecoin_df['Date'].dt.weekday

litecoin_df['weekend'] = litecoin_df['Date'].dt.weekday == (6 or 7 )
litecoin_df

#### Let's looks at Boxplot - for outliers in the data

In [None]:
df = litecoin_df.copy()
df = df.drop(columns = ['Day_since_started' ,  'year' , 'Vol.'])

df.boxplot()

## Log scaling
As we can see the data is highly skewed, so lets Log scale the data.

In [None]:
for col in ['Price(in dollars)', 'Open', 'High', 'Low' , 'Vol.' , 'Day_since_started' ,  'year']:
    litecoin_df[col] = np.log1p(litecoin_df[col])

In [None]:
plt.style.use('seaborn')

x = litecoin_df[['Open' , 'High' , 'Low' , 'Vol.']]
x.plot.area()

plt.xlabel('Day since started', fontsize=15)
plt.ylabel('Price', fontsize=15)
plt.title('Litecoin',fontsize=17)

ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0], reverse=True))
ax.legend(handles, labels)
plt.show()


# Visualize data

In [None]:
'''df = litecoin_df.copy()
df = df.drop(columns = ['Day_since_started' ,  'year'])'''

litecoin_df.boxplot()

#### Lets look at trands between different features.

In [None]:
sns.pairplot(litecoin_df )

### Looks good, but lets encode the graph using "Day since started"

In [None]:
sns.pairplot(litecoin_df , hue = 'Day_since_started')

#### Lets look at correlation between different features.

In [None]:
sns.set(rc = {'figure.figsize' : (14,12)})
sns.heatmap(litecoin_df.corr().round(2) , annot = True )

# New Version

## New Features to add - 
- Year end
- Year start
- Festival season
- Month end
- Quarted end

# Year start and end

In [None]:
litecoin_df['end_or_start_of_year'] = litecoin_df['month'] == ( 12 or 1 or 2) 
litecoin_df[litecoin_df['quarter_end_month'] == True].head(10)

# Quarter end - Months 

In [None]:
litecoin_df['quarter_end_month'] = litecoin_df['month'] == ( 3 or 6 or 9 or 12) 
litecoin_df[litecoin_df['quarter_end_month'] == True]

# Quarter end - Last few Days

In [None]:
litecoin_df['quarter_end_week'] = (litecoin_df['month'] == ( 3 or 6 or 9 or 12)) *  (litecoin_df['day'] > 24 ) 
litecoin_df[litecoin_df['quarter_end_week'] == True].head(10)

In [None]:
sns.set(rc = {'figure.figsize' : (14,12)})
sns.heatmap(litecoin_df.corr().round(2) , annot = True)

# New features to try in next version

- Price higher than last month
- Price lower than last month
- Price higher than last week
- Price lower than last week
- Price higher than last quarter
- Price lower than last quarter

------

- Price 10 % higher than last month
- Price 10 % lower than last month
- Price 20 % higher than last month
- Price 20 % lower than last month
- Price 20 % higher than last quarter
- Price 20 % lower than last quarter

## Comparison to other Cryptos

Mainly - Bitcoin, Ethereum

In [None]:
cardano_df = pd.read_csv('../input/cardano-data/Cardano historic data.csv')
binance_df = pd.read_csv('../input/binance-coin-data/Binance Coin - Historic data.csv')