In [2]:
import pandas as pd
import numpy as np
from vnstock import *
from datetime import *
import os
from datetime import datetime, timedelta

In [3]:
# Path to the directory containing the CSV files
folder_path = 'C:/Users/phong/OneDrive/Desktop/usstock/price'

# List all files in the directory
files = os.listdir(folder_path)

# Filter out the CSV files
csv_files = [file for file in files if file.endswith('.csv')]

# Initialize an empty list to hold DataFrames
df_list = []

# Loop through the CSV files and read them into DataFrames
for csv_file in csv_files:
    file_path = os.path.join(folder_path, csv_file)
    df = pd.read_csv(file_path)
    df_list.append(df)

# Concatenate all DataFrames into a single DataFrame
df = pd.concat(df_list, ignore_index=True)

In [4]:
df = df[['Date','Close','ticker']]

In [5]:
df.rename(columns={'Date': 'time', 'Close': 'close'}, inplace=True)

In [6]:
df['time'] = pd.to_datetime(df['time'])
df['month'] = df['time'].dt.month
df['year'] = df['time'].dt.year
df['yearmonth'] = df['time'].dt.strftime('%Y%m')

In [7]:
# Assuming 'time' column is in datetime format
# Sort the DataFrame by 'ticker' and 'time' for proper calculation
df.sort_values(by=['ticker', 'time'], inplace=True)

# Group by ticker and use shift to get the next month's close price
df['next month price'] = df.groupby('ticker')['close'].shift(-1)
# Group by ticker and use shift to get the next month's close price
df['last month price'] = df.groupby('ticker')['close'].shift(1)

In [8]:
import pandas as pd

# Assuming 'time' column is in datetime format
# Sort the DataFrame by 'ticker' and 'time' for proper calculation
df.sort_values(by=['ticker', 'time'], inplace=True)

# Calculate buying return for each ticker and each time
df['return'] = df['next month price'] / df['close'] - 1


In [9]:
df.head()

Unnamed: 0,time,close,ticker,month,year,yearmonth,next month price,last month price,return
23090263,2014-05-16,10.06,True,5,2014,201405,9.9,,-0.015905
23090264,2014-05-19,9.9,True,5,2014,201405,9.5,10.06,-0.040404
23090265,2014-05-20,9.5,True,5,2014,201405,9.625,9.9,0.013158
23090266,2014-05-21,9.625,True,5,2014,201405,9.31,9.5,-0.032727
23090267,2014-05-22,9.31,True,5,2014,201405,9.22,9.625,-0.009667


In [10]:
df['yearmonth'] = pd.to_datetime(df['yearmonth'], format='%Y%m')

# Group by yearmonth and rank returns within each group
df['ranking'] = df.groupby('yearmonth')['return'].rank(ascending=False)

In [11]:
con = df['ticker'] == 'A'
df[con]

Unnamed: 0,time,close,ticker,month,year,yearmonth,next month price,last month price,return,ranking
0,1999-11-18,31.473534,A,11,1999,1999-11-01,28.880545,,-0.082386,37180.0
1,1999-11-19,28.880545,A,11,1999,1999-11-01,31.473534,31.473534,0.089783,1034.0
2,1999-11-22,31.473534,A,11,1999,1999-11-01,28.612303,28.880545,-0.090909,37331.0
3,1999-11-23,28.612303,A,11,1999,1999-11-01,29.372318,31.473534,0.026563,5409.0
4,1999-11-24,29.372318,A,11,1999,1999-11-01,29.461731,28.612303,0.003044,14339.0


In [13]:
df = df.sort_values(by=['ticker', 'time'])

# Group by ticker and calculate the change
df['change'] = df.groupby('ticker')['close'].transform(lambda x: (x > x.shift(1)).astype(int))
df['change'] = df['change'].apply(lambda x: 'A' if x == 1 else 'B')
df.head()

Unnamed: 0,time,close,ticker,month,year,yearmonth,next month price,last month price,return,ranking,change
23090263,2014-05-16,10.06,True,5,2014,2014-05-01,9.9,,-0.015905,60419.5,B
23090264,2014-05-19,9.9,True,5,2014,2014-05-01,9.5,10.06,-0.040404,68001.0,B
23090265,2014-05-20,9.5,True,5,2014,2014-05-01,9.625,9.9,0.013158,11833.5,B
23090266,2014-05-21,9.625,True,5,2014,2014-05-01,9.31,9.5,-0.032727,66858.0,A
23090267,2014-05-22,9.31,True,5,2014,2014-05-01,9.22,9.625,-0.009667,55015.0,B


In [14]:
import pandas as pd

# Assuming df is your DataFrame
# Convert 'time' column to datetime if it's not already
df['time'] = pd.to_datetime(df['time'])

# Sort DataFrame by 'time'
df = df.sort_values(by='time')

# Group by 'ticker' and shift 'change' column to create columns for previous months
df['d-1'] = df.groupby('ticker')['change'].shift(1)
df['d-2'] = df.groupby('ticker')['change'].shift(2)
df['d-3'] = df.groupby('ticker')['change'].shift(3)
df['d-4'] = df.groupby('ticker')['change'].shift(4)
df['d-5'] = df.groupby('ticker')['change'].shift(5)
df['d-6'] = df.groupby('ticker')['change'].shift(6)


In [15]:
# Concatenate values from 'd-6' to 'd-1' along rows after filtering out 0 and "." and create a new column 'hexagram'
df['hexagram'] = df[['d-6', 'd-5', 'd-4', 'd-3', 'd-2', 'd-1']].replace({0:'', '.':''}).astype(str).agg(''.join, axis=1)


In [16]:
df['flag'] = df['hexagram'].apply(lambda x: 0 if 'nan' in x else 1)

In [20]:
con = df['flag'] ==1
df1 = df[con]

result = df1.groupby(['hexagram', 'year', 'month']).agg({'return': 'mean'}).reset_index()

result.rename(columns={'return': 'Average of return'}, inplace=True)

In [22]:
# write df to csv file and open it.
csv_file_path = 'C:/Users/phong/OneDrive/Desktop/usstock/test_20240519_916.csv'
result.to_csv(csv_file_path, index=False)
# Open the CSV file
os.startfile(csv_file_path)

In [23]:
df

Unnamed: 0,time,close,ticker,month,year,yearmonth,next month price,last month price,return,ranking,change,d-1,d-2,d-3,d-4,d-5,d-6,hexagram,flag
8082447,1975-01-02,13.875000,ETR,1,1975,1975-01-01,13.625000,,-0.018018,3345.0,B,,,,,,,nannannannannannan,0
2013939,1975-01-02,8.500000,AVA,1,1975,1975-01-01,8.937500,,0.051471,298.0,B,,,,,,,nannannannannannan,0
14222022,1975-01-02,0.731481,MCD,1,1975,1975-01-01,0.731481,,0.000000,2266.0,B,,,,,,,nannannannannannan,0
15230787,1975-01-02,7.375000,MSB,1,1975,1975-01-01,7.250000,,-0.016949,3314.0,B,,,,,,,nannannannannannan,0
10331367,1975-01-02,6.687500,GT,1,1975,1975-01-01,6.937500,,0.037383,500.0,B,,,,,,,nannannannannannan,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16219917,2024-05-17,9.390000,NMI,5,2024,2024-05-01,,9.360,,,A,B,A,B,B,B,A,ABBBAB,1
20516939,2024-05-17,4.840000,SELF,5,2024,2024-05-01,,4.970,,,B,B,B,A,B,B,B,BBBABB,1
20517415,2024-05-17,1.930000,SELX,5,2024,2024-05-01,,1.968,,,B,A,A,B,A,B,A,ABABAA,1
20521895,2024-05-17,15.740000,SEMR,5,2024,2024-05-01,,15.490,,,A,B,A,B,A,B,B,BBABAB,1
