In [4]:
!python3 -m pip install --user colorama
!python3 -m pip install --user mplcursors
!python3 -m pip install --user cufflinks plotly chart_studio

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
# %%writefile data_provider.py
import numpy as np
import pandas as pd
from khayyam import JalaliDate, JalaliDatetime
import pystore
from tqdm import tqdm, trange


def add_diff_min_max(df):
    df.loc[:, "diff_min_max"] = (df['max']-df['min'])*100/(df['min'])

def add_diff_ending(df):
    df.loc[:, "diff_open"] = (df['lastday']-df['ending'])*100/(df['lastday'])

def add_adjust_scale(df_symbol):
    lastdays = df_symbol["lastday"].copy().drop(df_symbol.index[0])
    endings = df_symbol["ending"].copy().drop(df_symbol.index[-1])
    endings.index = lastdays.index
    scale = lastdays/endings
    scale.loc[df_symbol.index[0]] = 1
    df_symbol.loc[:, "adj_scale"] = scale
    
def add_adjust(df):
    adj = df.loc[df["adj_scale"] < 1].index
    df.loc[:, "adj_open"] = df["open"]
    df.loc[:, "adj_close"] = df["close"]
    df.loc[:, "adj_ending"] = df["ending"]
    df.loc[:, "adj_min"] = df["min"]
    df.loc[:, "adj_max"] = df["max"]
    adj_headers = ["adj_min", "adj_max", "adj_close", "adj_open", "adj_ending"]
    for date in adj:
        scale = df.loc[date, "adj_scale"]
        df.loc[df.index[0]:date, adj_headers] = df.loc[df.index[0]:date, adj_headers].transform(lambda x: x * scale)

def add_log_adj(df):
    adj = df.loc[df["adj_scale"] < 1].index
    df.loc[:, "log_adj_open"] = np.log10(df["adj_open"])
    df.loc[:, "log_adj_close"] = np.log10(df["adj_close"])
    df.loc[:, "log_adj_ending"] = np.log10(df["adj_ending"])
    df.loc[:, "log_adj_min"] = np.log10(df["adj_min"])
    df.loc[:, "log_adj_max"] = np.log10(df["adj_max"])

def adjust_and_log(df):
    add_adjust_scale(df)
    add_adjust(df)
    add_log_adj(df)
    return df
    
class DataModel:
    TA_SYMBOLS = ["خپارس", "خكاوه", "فاسمين", "شبريز", "ونوين", "كنور", "ثشرق", "كاما", "ورنا", "خمحركه", "دامين",
                  "خاور", "خپارس", "خودرو", "فجام", "وبصادر"]

    def __init__(self,data_location, file_names=[], pystore_path='/home/nimac/.pystore'):
        self.data_location = data_location;
        self.file_names = file_names;
        pystore.set_path(pystore_path)
        self.__is_scaled = {}
    
    def __read_csv(self, file_name):
        return pd.read_csv(f'{self.data_location}/{file_name}', sep=',',header=[0],
                           parse_dates=["date"])
    

    def adjust_all(self):
        for i in trange(len(self.symbols)):
            df = self.df.loc[self.df["symbol"]==self.symbols[i]]
            df = adjust_and_log(df)
            self.df.loc[self.df["symbol"]==self.symbols[i]] = df
    
    def initialize(self):
        add_diff_min_max(self.df)
        add_diff_ending(self.df)
        self.df = self.df.set_index('date')
        self.symbols = self.df["symbol"].unique()
        other_headers = ["adj_min", "adj_max", "adj_close", "adj_open", "adj_ending", "log_adj_open", "log_adj_close", "log_adj_ending", "log_adj_min", "log_adj_max"]
        for header in other_headers:
            self.df[header] = np.nan

    def read(self):
        dfs = []
        for name in self.file_names:
            dfs.append(self.__read_csv(name))
        self.df = pd.concat(dfs, ignore_index=True)
        self.initialize()

#         print("hi")
#         self.df = self.df.groupby("symbol").apply(add_adjust_scale)
#         self.allSymbols = self.df.symbol.tolist()
#         self.symbols = list(set(self.df.symbol))[1:]
#         for symbol in self.symbols:
#         counts = Counter(self.allSymbols)
#         testSymbols = []
#         tmpSymbols = []
#         for symbol in symbols:
#             if counts[symbol] > RECORD_THRESHOLD:
#                 tmpSymbols.append(symbol)
#         for i in range(TESTCASE_NUMBER):
#             ran = random.randint(0, len(tmpSymbols)-1)
#             testSymbols.append(tmpSymbols[ran])
#             tmpSymbols.remove(tmpSymbols[ran])
#         print("test symbol", len(testSymbols))

    def store_in_pystore(self, store_name='tradion_store', collection_name='boors'):
        self.store = pystore.store(store_name)
        self.collection = self.store.collection(collection_name)
        self.collection.write('ALL', self.df, metadata={'source': 'tsetmc'}, overwrite=True)
    
    def restore_from_pystore(self, store_name='tradion_store', collection_name='boors',
                             item_name='ALL'):
        self.store = pystore.store(store_name)
        self.collection = self.store.collection(collection_name)
        self.item = collection.item(item_name)
        self.df = item.to_pandas()
    
    def get(self, symbol, start="", end=""):
        
        if start == "":
            start = self.df.index[0]
        else:
            s_date = start.split("-")
            start = JalaliDate(s_date[0], s_date[1], s_date[2]).todate()
        if end == "":
            end = self.df.index[-1]
        else:
            e_date = end.split("-")
            end = JalaliDate(e_date[0], e_date[1], e_date[2]).todate()
        tmpdf = self.df.loc[self.df["symbol"]==symbol]
        if(not self.__is_scaled.get(symbol, False)):
            tmpdf = adjust_and_log(tmpdf)
            self.df.loc[self.df["symbol"]==symbol] = tmpdf
            self.__is_scaled[symbol] = True
        return tmpdf.loc[start:end]
    
    def check_contains_name(self, symbol):
        dm.df.loc[dm.df["symbol"].str.contains(symbol)==True]
        
    def get_overal_corr(self, symbols):
        df_corr = pd.DataFrame()
        for symbol in symbols:
            df_corr[f'{symbol}_log_adj_ending'] = self.get(symbol)["log_adj_ending"]
        return df_corr.corr()

In [3]:
dm = DataModel("../../xcels", ["master0.csv", "master1.csv"])
dm.read()

In [14]:
symbol = "خساپا"
symbol = dm.TA_SYMBOLS[0]
print(symbol)
date = "1380-2-19"
useful_column = ["lastday", "close", 'ending', 'year', 'month', 'day', 'adj_close', 'adj_ending']
df = dm.get(symbol, date)

خپارس


In [13]:
# import chart_studio.plotly as py
# import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# from plotly.tools import mpl_to_plotly
# import plotly.io as pio
# import cufflinks
# cufflinks.go_offline(connected=True)
# init_notebook_mode(connected=True)

df_sel = df[np.logical_and(df.year > 1380, df.year < 1390)]
df_sel["adj_close"].iplot()

In [49]:
symbol = "خساپا"
df = dm.get(symbol,"1385-1-1", "1398-12-26")
abnormal = df[df["adj_scale"] < 1]
for a in abnormal.index:
    print(JalaliDate(a), abnormal.loc[a, "adj_scale"])
add_adjust(df)

df.loc[:, ["close", "open", "ending", "lastday"]]

Unnamed: 0_level_0,symbol,name,amount,volume,value,lastday,open,close,last-change,last-percent,...,adj_open,adj_close,adj_ending,adj_min,adj_max,log_adj_open,log_adj_close,log_adj_ending,log_adj_min,log_adj_max
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-07-20,خساپا,سايپا,12129.0,364203156.0,570068900000.0,1526.0,1510.0,1499.0,-27.0,-1.77,...,1510.0,1499.0,1565.0,1490.0,1594.0,3.178977,3.175802,3.194514,3.173186,3.202488
2019-07-24,خساپا,سايپا,2466.0,41511494.0,63696560000.0,1565.0,1545.0,1520.0,-45.0,-2.88,...,1545.0,1520.0,1534.0,1520.0,1558.0,3.188928,3.181844,3.185825,3.181844,3.192567


In [1]:
import data_provider as dp
dm = dp.DataModel("../../xcels", ["master0.csv", "master1.csv"])
dm.read()
symbol = "فولاد"
df = dm.get(symbol, "1394-4-23", "1394-6-18")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
