In [4]:
### 月份效应
Month-of-the-Year Effect实证研究发现，在大多数的证券市场中存在某个或某些特定月份的平均收益率年复一年显著地异于其他各月平均收益率的现象，这种市场异象被称作“月份效应”。  
在美国的股票市场表现为“1月效应”，即1月份的平均收益率显著高于其他月份的平均收益。该现象最早由美国学者瓦切尔(Wachte1)于1942年发现，但直到1976年罗兹弗(Rozef)和金乃尔(Kinney)系统地将这一异象揭示出来，“1月效应”才逐渐进入现代金融学者的视野，并逐渐形成一套科学严谨的“月份效应”研究体系，包括“1月效应”主要体现在小规模公司的股票上，并相应地提出了“税减假说”等理论解释。  

张兵给出了基于资金面季节性流动规律的简要解释，总结起来有两方面：第一，资金面的季节变化，我国股市年末往往面临着各种形式的资金抽回，而来年初这些资金又会回流，这一资金运动规律决定了股市的“春涨”、“冬藏”；第二，重大利好政策往在2、3月份发布，从而引发“春涨”现象 。


SyntaxError: invalid character '，' (U+FF0C) (4176073160.py, line 2)

In [5]:
#先引入后面可能用到的包（package）
import pandas as pd  
import numpy as np
import yfinance as yf
from scipy import stats
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline   

#正常显示画图时出现的中文和负号
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['Arial Unicode Ms']
mpl.rcParams['axes.unicode_minus'] = False

In [6]:
# Get data function
def get_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    print(df.head())  # Print the first few rows to check the data
    return df

# Calculate daily log returns
def log_ret(df):
    if 'Adj Close' in df.columns:
        logret = np.log(df['Adj Close'] / df['Adj Close'].shift(1))[1:]
    elif 'Close' in df.columns:
        logret = np.log(df['Close'] / df['Close'].shift(1))[1:]
    else:
        raise ValueError("DataFrame does not contain 'Adj Close' or 'Close' column.")
    return logret

# Convert daily returns to monthly returns
def month_rate(logret):
    date = []
    d = list(logret.index)
    for i in range(len(logret)):
        t = ''.join([d[i].strftime("%Y"), d[i].strftime("%m"), "01"])
        date.append(datetime.strptime(t, "%Y%m%d"))
    y = pd.DataFrame(logret.values, index=date, columns=['月收益率'])
    ret_M = y.groupby(y.index).sum()
    return ret_M


# Convert daily returns to annual returns
def annual_rate(logret):
    year = []
    d = list(logret.index)
    for i in range(len(logret)):
        year.append(d[i].strftime("%Y"))
    y = pd.DataFrame(logret.values, index=year, columns=['年收益率'])
    ret_Y = np.exp(y.groupby(y.index).sum()) - 1
    return ret_Y


In [7]:
# Fetch data
df = get_data('^TWII', '1993-01-01', '2024-08-26')

# Calculate log returns
logret = log_ret(df)

# Calculate monthly returns
ret_M = month_rate(logret)

# Calculate annual returns
ret_Y = annual_rate(logret)

[*********************100%%**********************]  1 of 1 completed

                   Open         High          Low        Close    Adj Close  \
Date                                                                          
1997-07-02  9094.269531  9124.299805  8988.129883  8996.719727  8996.678711   
1997-07-03  9061.599609  9061.599609  8997.509766  9027.629883  9027.588867   
1997-07-04  9144.959961  9218.209961  9119.250000  9192.570312  9192.528320   
1997-07-07  9365.190430  9370.410156  9304.820312  9322.839844  9322.797852   
1997-07-08  9345.549805  9371.500000  9282.099609  9305.269531  9305.227539   

            Volume  
Date                
1997-07-02       0  
1997-07-03       0  
1997-07-04       0  
1997-07-07       0  
1997-07-08       0  





### 查看K线图
使用pyecharts画狂拽酷炫的K线图（cmd上先安装：pip install pyecharts）

In [17]:
from pyecharts.charts import Line
from pyecharts import options as opts

# Create sample data
x_data = ['2024-01-01', '2024-01-02', '2024-01-03']
y_data = [100, 200, 300]

# Create a simple line chart
line_chart = (
    Line()
    .add_xaxis(x_data)
    .add_yaxis("Sample Line", y_data)
    .set_global_opts(title_opts=opts.TitleOpts(title="Simple Line Chart"))
)

# Render the chart
line_chart.render("simple_line_chart.html")

'/Users/johnson/Library/CloudStorage/OneDrive-個人/Documents/Learning By Working/Pratice_SCRIPT/Py_Learn/MS_Stock/simple_line_chart.html'

In [18]:
x_data = [date.strftime("%Y-%m-%d") for date in df['Date']]  # X-axis: Dates
y_data = df[['Open', 'Close', 'Low', 'High']].values.tolist()  # Y-axis: OHLC data as a list of lists

# Print for debugging
print(f"x_data: {x_data[:5]}")  # Check the first few dates
print(f"y_data: {y_data[:5]}")  # Check the first few OHLC data points

x_data: ['1997-07-02', '1997-07-03', '1997-07-04', '1997-07-07', '1997-07-08']
y_data: [[9094.26953125, 8996.7197265625, 8988.1298828125, 9124.2998046875], [9061.599609375, 9027.6298828125, 8997.509765625, 9061.599609375], [9144.9599609375, 9192.5703125, 9119.25, 9218.2099609375], [9365.1904296875, 9322.83984375, 9304.8203125, 9370.41015625], [9345.5498046875, 9305.26953125, 9282.099609375, 9371.5]]


In [19]:
from pyecharts.charts import Kline
from pyecharts import options as opts

# Create the Kline chart
kline = (
    Kline()
    .add_xaxis(x_data)
    .add_yaxis(
        "Kline",
        y_data,
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average")]
        ),
        markpoint_opts=opts.MarkPointOpts(
            data=[opts.MarkPointItem(type_="max"), opts.MarkPointItem(type_="min")],
            symbol_size=80
        )
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="台指指数K线图", pos_left="center"),
        datazoom_opts=opts.DataZoomOpts(is_show=True),
        yaxis_opts=opts.AxisOpts(is_scale=True),
        xaxis_opts=opts.AxisOpts(type_="category"),
    )
)

# Render the chart
kline.render("台指指数图.html")

'/Users/johnson/Library/CloudStorage/OneDrive-個人/Documents/Learning By Working/Pratice_SCRIPT/Py_Learn/MS_Stock/台指指数图.html'

### 年收益率情况

In [23]:
from pyecharts.charts import Bar
from pyecharts import options as opts

# Prepare the data
attr = list(ret_Y.index)  # x-axis: years
v = list((ret_Y['年收益率'] * 100).round(2))  # y-axis: percentage returns

# Create the bar chart
bar = (
    Bar()
    .add_xaxis(attr)
    .add_yaxis("年收益率(%)", v, label_opts=opts.LabelOpts(is_show=True))
    .set_global_opts(
        title_opts=opts.TitleOpts(title="台股指数历年收益率(%)", pos_left="center"),
        datazoom_opts=opts.DataZoomOpts(is_show=True),
    )
)

# Render the chart
bar.render("台股指数历年收益率图.html")

'/Users/johnson/Library/CloudStorage/OneDrive-個人/Documents/Learning By Working/Pratice_SCRIPT/Py_Learn/MS_Stock/台股指数历年收益率图.html'

In [24]:
ret_Y.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
年收益率,28.0,0.064316,0.255069,-0.460255,-0.093512,0.092257,0.229336,0.783428


In [26]:
ret_Y.sort_values('年收益率')[:25].T

Unnamed: 0,2008,2000,2022,1998,2011,2002,2015,1997,2018,2004,...,2016,2013,2017,2001,2006,2020,2019,2024,2021,2023
年收益率,-0.460255,-0.43851,-0.224007,-0.216048,-0.211805,-0.197936,-0.104134,-0.089972,-0.086015,0.04227,...,0.109795,0.11845,0.150145,0.170175,0.194764,0.228004,0.233333,0.235753,0.23664,0.268298


In [27]:
ret_Y.sort_values('年收益率',ascending=False)[:25].T

Unnamed: 0,2009,2003,1999,2023,2021,2024,2019,2020,2006,2001,...,2007,2014,2005,2004,2018,1997,2015,2002,2011,1998
年收益率,0.783428,0.323022,0.31634,0.268298,0.23664,0.235753,0.233333,0.228004,0.194764,0.170175,...,0.087242,0.080793,0.066559,0.04227,-0.086015,-0.089972,-0.104134,-0.197936,-0.211805,-0.216048


In [28]:
ratio=int(ret_Y[ret_Y['年收益率']>0].count().values)/len(ret_Y)
print(f'上涨年份占比{ratio*100}%')

上涨年份占比67.85714285714286%


  ratio=int(ret_Y[ret_Y['年收益率']>0].count().values)/len(ret_Y)


### 月份效应检验
所谓“月份效应”，主要是指股票市场中存在某个或某些特定月份的平均收益率年复一年显著地异于其他各月平均收益率的现象。有实证研究表明，美国股票市场表现为“1月效应”，即1月份的平均收益率为正，且显著高于其他月份的平均收益。该现象最早由Wachte1（1942）发现，但直到1976年Rozef和Kinney系统地将这一异象揭示出来，“1月效应”才引起金融界的注意。美国等发达国家对“月份效应”的讨论和研究已走向成熟，但是由于A股运行时间较短，并且由于文化差异（节假日安排）导致的交易时间不一致，其“月份效应”特征可能异于我国，因此仍有必要进一步挖掘和探讨。  
检验思路：（1）将数据分成两组：m月份和其他月份；（2）检验两组数据的平均值是否相等

In [32]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

# Function to get data using yfinance
def get_data(ticker, start_date, end_date=None):
    df = yf.download(ticker, start=start_date, end=end_date)
    df.index = pd.to_datetime(df.index)  # Ensure the index is datetime
    return df

# Log return calculation
def log_ret(series):
    return np.log(series / series.shift(1))[1:]

# Convert daily returns to monthly returns
def month_rate(logret):
    return logret.resample('M').sum()

# Convert daily returns to annual returns
def annual_rate(logret):
    return np.exp(logret.resample('Y').sum()) - 1

# Fetch data
df = get_data('^TWII', '1993-01-01', '2024-08-26')

# Calculate log returns
logret = log_ret(df['Adj Close'])

# Calculate monthly returns
ret_M = month_rate(logret)

# Calculate annual returns
ret_Y = annual_rate(logret)

# Create a DataFrame to store monthly returns
df_m = pd.DataFrame(index=ret_M.index.year.unique())  # Use years as the index

# Monthly returns for each year
for i in range(1, 13):
    ret = ret_M[ret_M.index.month == i]
    ret.index = ret.index.year  # Set the index to the year for alignment
    df_m[str(i) + '月份'] = ret.reindex(df_m.index)  # Reindex to match df_m

# Summary statistics of monthly returns
summary = df_m.describe().round(3)
print(summary)

[*********************100%%**********************]  1 of 1 completed

          1月份     2月份     3月份     4月份     5月份     6月份     7月份     8月份     9月份  \
count  27.000  27.000  27.000  27.000  27.000  27.000  28.000  28.000  27.000   
mean    0.015   0.018   0.015   0.001   0.002  -0.003  -0.001  -0.010  -0.030   
std     0.073   0.054   0.051   0.062   0.049   0.066   0.060   0.050   0.082   
min    -0.123  -0.124  -0.151  -0.110  -0.082  -0.136  -0.133  -0.156  -0.215   
25%    -0.027  -0.022  -0.003  -0.042  -0.032  -0.039  -0.037  -0.034  -0.070   
50%     0.006   0.026   0.010  -0.001  -0.002  -0.004   0.004  -0.004  -0.006   
75%     0.049   0.053   0.034   0.033   0.026   0.036   0.024   0.014   0.019   
max     0.224   0.129   0.134   0.140   0.140   0.146   0.112   0.096   0.095   

         10月份    11月份    12月份  
count  27.000  27.000  27.000  
mean   -0.010   0.015   0.024  
std     0.071   0.064   0.063  
min    -0.175  -0.123  -0.112  
25%    -0.030  -0.014   0.002  
50%     0.006   0.011   0.024  
75%     0.041   0.060   0.049  
max     0.088 


  return logret.resample('M').sum()
  return np.exp(logret.resample('Y').sum()) - 1


In [36]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

# Function to get data using yfinance
def get_data(ticker, start_date, end_date=None):
    df = yf.download(ticker, start=start_date, end=end_date)
    df.index = pd.to_datetime(df.index)  # Ensure the index is datetime
    return df

# Log return calculation
def log_ret(series):
    return np.log(series / series.shift(1))[1:]

# Convert daily returns to monthly returns
def month_rate(logret):
    return logret.resample('M').sum()

# Convert daily returns to annual returns
def annual_rate(logret):
    return np.exp(logret.resample('Y').sum()) - 1

# Fetch data
df = get_data('^TWII', '1993-01-01', '2024-08-26')

# Calculate log returns
logret = log_ret(df['Adj Close'])

# Calculate monthly returns
ret_M = month_rate(logret)

# Create a DataFrame to store monthly returns, indexed by year
df_m = pd.DataFrame(index=ret_M.index.year.unique())  # Use years as the index

# Monthly returns for each year
for i in range(1, 13):
    ret = ret_M[ret_M.index.month == i]  # Extract returns for each month
    ret.index = ret.index.year           # Set the index to the year for alignment
    # Reindex to match df_m index and handle missing values
    ret_reindexed = ret.reindex(df_m.index)
    df_m[str(i) + '月份'] = ret_reindexed.values

# Summary statistics of monthly returns
summary = df_m.describe().round(3)
print(summary)

[*********************100%%**********************]  1 of 1 completed

          1月份     2月份     3月份     4月份     5月份     6月份     7月份     8月份     9月份  \
count  27.000  27.000  27.000  27.000  27.000  27.000  28.000  28.000  27.000   
mean    0.015   0.018   0.015   0.001   0.002  -0.003  -0.001  -0.010  -0.030   
std     0.073   0.054   0.051   0.062   0.049   0.066   0.060   0.050   0.082   
min    -0.123  -0.124  -0.151  -0.110  -0.082  -0.136  -0.133  -0.156  -0.215   
25%    -0.027  -0.022  -0.003  -0.042  -0.032  -0.039  -0.037  -0.034  -0.070   
50%     0.006   0.026   0.010  -0.001  -0.002  -0.004   0.004  -0.004  -0.006   
75%     0.049   0.053   0.034   0.033   0.026   0.036   0.024   0.014   0.019   
max     0.224   0.129   0.134   0.140   0.140   0.146   0.112   0.096   0.095   

         10月份    11月份    12月份  
count  27.000  27.000  27.000  
mean   -0.010   0.015   0.024  
std     0.071   0.064   0.063  
min    -0.175  -0.123  -0.112  
25%    -0.030  -0.014   0.002  
50%     0.006   0.011   0.024  
75%     0.041   0.060   0.049  
max     0.088 


  return logret.resample('M').sum()


In [39]:
# Excluding returns for a specific month and creating new DataFrame
df_exm = pd.DataFrame(index=df_m.index)
for i in range(1, 13):
    ret_exm = ret_M[ret_M.index.month != i]
    df_exm['ex' + str(i)] = ret_exm.reindex(df_exm.index).values

print(df_exm.head(27))

      ex1  ex2  ex3  ex4  ex5  ex6  ex7  ex8  ex9  ex10  ex11  ex12
Date                                                               
1997  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
1998  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
1999  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2000  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2001  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2002  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2003  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2004  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2005  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2006  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2007  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2008  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   NaN   NaN   NaN
2009  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  Na