In [631]:
import pandas as pd
import numpy as np
from vnstock import *
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta

In [632]:
path = 'C:/Users/atlas/OneDrive/Desktop/vnstock'

In [633]:
#daily_price = pd.read_csv(path + '/daily_price.csv')
yearly_price = pd.read_csv(path + '/yearly_price.csv')
df = pd.read_csv(path + '/df10.csv')

In [None]:
# Initialize an empty list to store the results
top_tickers_df_append = []

# Finding the top ticker for each time point
for x in range(1, 32):  # Loop from 1 to 3 as per your requirement
    top_tickers_df = pd.DataFrame(columns=['time', 'Ranking', 'Top Ticker', 'x'])

    for col in range(1, x):
        max_ticker_idx = df.groupby('time')[str(col)].idxmax()
        max_ticker = df.loc[max_ticker_idx]
        max_ticker['Ranking'] = f'ranking{col}'  # Adjust ranking label based on column
        max_ticker.rename(columns={'Ticker': 'Top Ticker', 'time': 'time'}, inplace=True)
        max_ticker['x'] = col  # Assign the value of x dynamically
        top_tickers_df = pd.concat([top_tickers_df, max_ticker[['time', 'Ranking', 'Top Ticker', 'x']]], ignore_index=True)

    # Convert 'time' column in top_tickers_df to datetime64[ns]
    top_tickers_df['time'] = pd.to_datetime(top_tickers_df['time'])

    # Create the new column "x_final" to get the max value of "x"
    top_tickers_df['x_final'] = top_tickers_df['x'].max()

    # Append the DataFrame to the list
    top_tickers_df_append.append(top_tickers_df)

# Concatenate all DataFrames in the list
top_tickers = pd.concat(top_tickers_df_append, ignore_index=True)

In [None]:
yearly_price['time'] = pd.to_datetime(yearly_price['time'])
top_tickers = pd.merge(top_tickers, yearly_price[['time','ticker','next date']], left_on=['time', 'Top Ticker'], right_on=['time', 'ticker'], how='left')
top_tickers.drop(columns=['ticker'], inplace=True)
top_tickers.rename(columns ={'next date':'buying date'}, inplace = True)
top_tickers['buying date'] = pd.to_datetime(top_tickers['buying date'])

In [None]:
final = pd.merge(top_tickers, yearly_price, left_on=['buying date', 'Top Ticker'], right_on=['time', 'ticker'], how='left')
final['year'] = final['buying date'].dt.year

In [None]:
final.head()

In [None]:
output = pd.pivot_table(data=final, values="return", index="x_final", columns='year', aggfunc='mean', fill_value=0)
output.reset_index(drop=False, inplace=True)  # Reset index
avg_annual_return = output.iloc[:, 1:-1].mean(axis=1)
output['avg_annual_return'] = avg_annual_return
output = output.sort_values(by='avg_annual_return', ascending=False)

In [None]:
#To create the maximum avg annual return:
# Find the index of the row with maximum avg_annual_return
max_index = output['avg_annual_return'].idxmax()
x_final_max_return = output.loc[max_index, 'x_final']
print(x_final_max_return)

con4 = output ['x_final'] == x_final_max_return
max_output = output[con4]

#delete the last col
max_output = max_output.iloc[:, :-1]

#delete the first col
max_output = max_output.iloc[:, 1:]
max_output = max_output.T
max_output.reset_index(drop=False, inplace=True)  # Reset index

max_output = max_output.rename(columns={max_output.columns[1]: 'avg_return_model'})
max_output['year'] = max_output['year'].astype(int)
max_output.head()

In [None]:
import matplotlib.pyplot as plt

# Remove '%' sign and convert to float
output['avg_annual_return'] = output['avg_annual_return'].apply(lambda x: float(x.strip('%')) if isinstance(x, str) else x)

# Sort DataFrame by "x_final"
output_sorted = output.sort_values(by='x_final')

# Plot the line chart
plt.plot(output_sorted['x_final'], output_sorted['avg_annual_return'])
plt.xlabel('x_final')
plt.ylabel('Average Annual Return (%)')
plt.title('Average Annual Return vs x_final')
plt.grid(True)  # Add grid lines

# Set x-axis ticks with steps of 1
plt.xticks(range(int(output_sorted['x_final'].min()), int(output_sorted['x_final'].max()) + 1, 1))

plt.show()


In [None]:
# Group the data by year and calculate the mean return for each year
avg_return_market = yearly_price.groupby('year')['return'].mean().reset_index()

# Rename the columns for clarity
avg_return_market.columns = ['year', 'avg_return_market']
avg_return_market.head(2)

In [None]:
return_comparison = pd.merge(max_output, avg_return_market, left_on=['year'], right_on=['year'], how='left')

In [None]:
current_year = pd.Timestamp.now().year  # Get the current year

# Create the gap_year column
return_comparison['gap_year'] = current_year - return_comparison['year']

In [None]:
return_comparison.head(2)

In [None]:
import pandas as pd

# Calculate cumulative sum and count of close prices for all history
yearly_price['cumulative_sum_all'] = yearly_price.groupby('ticker')['close'].transform(lambda x: x.cumsum() - x)
yearly_price['cumulative_count_all'] = yearly_price.groupby('ticker').cumcount()

# Calculate cumulative sum and count of close prices for the last 3 years
yearly_price['cumulative_sum_3years'] = yearly_price.groupby('ticker')['close'].transform(lambda x: x.rolling(window=4, min_periods=1).sum() - x)
yearly_price['cumulative_count_3years'] = yearly_price.groupby('ticker')['close'].transform(lambda x: x.rolling(window=4, min_periods=1).count() - 1)

# Calculate average close price for all history and last 3 years
yearly_price['avg_price_all_history'] = yearly_price['cumulative_sum_all'] / yearly_price['cumulative_count_all']
yearly_price['avg_price_3years'] = yearly_price['cumulative_sum_3years'] / yearly_price['cumulative_count_3years']

# Drop unnecessary columns
yearly_price.drop(columns=['cumulative_sum_all', 'cumulative_count_all', 'cumulative_sum_3years', 'cumulative_count_3years'], inplace=True)

# Group by year and ticker, then take the last entry (which is the highest year) for each ticker
new_df = yearly_price.groupby(['ticker', 'year']).last().reset_index()
new_df.head(3)


In [None]:
con5 = merged_df_final ['x_final'] == x_final_max_return
stock_watch = merged_df_final[con5]
stock_watch = stock_watch[['x_final','ticker','buying date','next date','close','next year price','return','year']]
stock_watch = stock_watch.sort_values(by='buying date', ascending=False)
stock_watch['return'] = (stock_watch['return'] * 100).map('{:.2f}%'.format)
#stock_watch = pd.merge(stock_watch, new_df[['ticker','year','avg_price_all_history','avg_price_3years']], left_on=['year', 'ticker'], right_on=['year', 'ticker'], how='left')
stock_watch.rename(columns={'close': 'buying price', 'next date': 'selling date', 'next year price': 'selling price'}, inplace=True)
stock_watch[stock_watch['year'] == 2023]

In [None]:

# Create a new DataFrame with the first column unchanged and the rest converted to percentages
output.iloc[:, 1:] = output.iloc[:, 1:].apply(lambda x: x.map(lambda val: f"{val * 100:.1f}%"))
output.head()

In [None]:
stock_watch[stock_watch['year'] == 2023]

In [None]:
# Assuming return_comparison is your DataFrame
# Calculate the averages based on the conditions
avg_return_model_condition = return_comparison[return_comparison['gap_year'] >= 2]['avg_return_model'].mean()
avg_return_market_condition = return_comparison[return_comparison['gap_year'] >= 2]['avg_return_market'].mean()

# Create a new DataFrame with the calculated averages
new_df = pd.DataFrame({
    'avg_return_model_condition': [avg_return_model_condition],
    'avg_return_market_condition': [avg_return_market_condition]
})

new_df

In [None]:
import matplotlib.pyplot as plt

# Calculate the averages based on the conditions
avg_return_model_condition = return_comparison[return_comparison['gap_year'] >= 2]['avg_return_model'].mean()
avg_return_market_condition = return_comparison[return_comparison['gap_year'] >= 2]['avg_return_market'].mean()

# Create a bar chart
labels = ['avg_return_model', 'avg_return_market']
values = [avg_return_model_condition, avg_return_market_condition]

plt.bar(labels, values, color=['blue', 'green'])
plt.xlabel('Return Type')
plt.ylabel('Average Return')
plt.title('Comparison of Average Returns')
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Assuming return_comparison is your DataFrame

# Sort DataFrame by year
return_comparison_sorted = return_comparison.sort_values(by='year')

# Plot the line chart
plt.plot(return_comparison_sorted['year'], return_comparison_sorted['avg_return_model'], label='avg_return_model')
plt.plot(return_comparison_sorted['year'], return_comparison_sorted['avg_return_market'], label='avg_return_market')

# Add data points
plt.scatter(return_comparison_sorted['year'], return_comparison_sorted['avg_return_model'], color='blue')
plt.scatter(return_comparison_sorted['year'], return_comparison_sorted['avg_return_market'], color='green')

# Add grid lines behind the chart
plt.grid(True, zorder=0)

# Add labels and title
plt.xlabel('Year')
plt.ylabel('Average Return')
plt.title('Comparison of Average Returns Over the Years')

# Set x-axis ticks with steps of 1 year
plt.xticks(return_comparison_sorted['year'])

# Add legend
plt.legend()

# Show plot
plt.show()
