In [113]:
from scipy import stats
import pandas as pd
import numpy as np
from lmfit import models
import matplotlib.pyplot as plt

In [114]:
df=pd.read_csv("./AAPL.csv")
df.shape

(1259, 7)

In [115]:
df=df[['Date','Close']]
df.rename(columns={'Close': 'Price_On_Date'}, inplace=True)

In [116]:
df.head()

Unnamed: 0,Date,Price_On_Date
0,7/1/2014,93.519997
1,7/2/2014,93.480003
2,7/3/2014,94.029999
3,7/7/2014,95.970001
4,7/8/2014,95.349998


In [117]:
df['Price_Next_Day']=df.Price_On_Date.shift(-1)

In [118]:
df.head()

Unnamed: 0,Date,Price_On_Date,Price_Next_Day
0,7/1/2014,93.519997,93.480003
1,7/2/2014,93.480003,94.029999
2,7/3/2014,94.029999,95.970001
3,7/7/2014,95.970001,95.349998
4,7/8/2014,95.349998,95.389999


In [119]:
df['Daily_Return']=df['Price_Next_Day']/df['Price_On_Date']
df.head()

Unnamed: 0,Date,Price_On_Date,Price_Next_Day,Daily_Return
0,7/1/2014,93.519997,93.480003,0.999572
1,7/2/2014,93.480003,94.029999,1.005884
2,7/3/2014,94.029999,95.970001,1.020632
3,7/7/2014,95.970001,95.349998,0.99354
4,7/8/2014,95.349998,95.389999,1.00042


In [120]:
df=df.iloc[:-1,:]
df.shape

(1258, 4)

In [121]:
print(f"mean:{np.mean(df['Daily_Return'])},standard deviation:{np.std(df['Daily_Return'])}")

mean:1.0007311411047328,standard deviation:0.015512461270180765


In [122]:
lower_limit=np.quantile(df['Daily_Return'], .1, axis = None)
upper_limit=np.quantile(df['Daily_Return'], .9, axis = None)

In [123]:
#quantiles for news labelling
print(f"Lower Limit: {lower_limit}\nUpper Limit: {upper_limit}")

Lower Limit: 0.98289967847269
Upper Limit: 1.0171308930811191


In [124]:
df['Action_for_News_Date']='No Action'

In [125]:
df.loc[(df['Daily_Return'] >lower_limit) & (df['Daily_Return'] <upper_limit), 'Action_for_News_Date'] = 'Hold'
df.loc[(df['Daily_Return'] <=lower_limit), 'Action_for_News_Date'] = 'Sell'
df.loc[(df['Daily_Return'] >=upper_limit), 'Action_for_News_Date'] = 'Buy'


In [126]:
df.groupby('Action_for_News_Date')['Date'].count()

Action_for_News_Date
Buy      126
Hold    1006
Sell     126
Name: Date, dtype: int64

In [127]:
df_json=df[['Date','Action_for_News_Date']]

In [128]:
df_json.to_json("./Date_Labels.json")