<a href="https://colab.research.google.com/github/radosty/radosty.github.io/blob/main/visualisation2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime

# List of fintech and companies that offer fintech services
tickers = [
    'PYPL', 'SQ', 'ADYEN.AS', 'WISE.L', 'AFRM', 'SOFI', 'UPST',
    'JPM', 'BAC', 'WFC', 'C', 'GS', 'V', 'MA', 'SHOP', 'MELI', 'SE'
]

# Define the dates of interest
start_date = '2021-01-01'
end_date = '2021-12-31'

# Create an empty DataFrame to store all data
all_stock_data = pd.DataFrame()

for ticker in tickers:
    try:
        # Create a Ticker object
        stock = yf.Ticker(ticker)

        # Fetch historical data
        hist = stock.history(start=start_date, end=end_date, interval="1mo")

        if hist.empty:
            print(f"No data for {ticker}, skipping.")
            continue

        # Reset index to make Date a column
        hist.reset_index(inplace=True)

        # Add Ticker column
        hist['Ticker'] = ticker

        # Calculate monthly return
        hist['Monthly Return (%)'] = hist['Close'].pct_change() * 100

        # Select relevant columns
        hist = hist[['Date', 'Ticker', 'Close', 'Monthly Return (%)']]

        # Append to the main DataFrame
        all_stock_data = pd.concat([all_stock_data, hist], ignore_index=True)

    except Exception as e:
        print(f"Error processing {ticker}: {e}")

# Display the first few rows of the combined data
print(all_stock_data.head())

# Display column names
print("\nColumns in all_stock_data:")
print(all_stock_data.columns)

# Save the raw data
all_stock_data.to_csv("raw_fintech_stock_data_2021.csv", index=False)
print("\nRaw fintech stock data saved.")

# Calculate average monthly performance
average_monthly_performance = all_stock_data.groupby(['Date', 'Ticker']).agg({
    'Close': 'mean',
    'Monthly Return (%)': 'mean'
}).reset_index()

# Calculate average across all tickers
final_performance = average_monthly_performance.groupby('Date').agg({
    'Close': 'mean',
    'Monthly Return (%)': 'mean'
}).reset_index()

# Rename columns as it makes more sense
final_performance.rename(columns={
    'Close': 'Average Closing Price',
    'Monthly Return (%)': 'Average Monthly Return (%)'
}, inplace=True)

# Save the averaged performance data
final_performance.to_csv("average_fintech_performance_2021.csv", index=False)
print("Average fintech performance data saved.")

                        Date Ticker       Close  Monthly Return (%)
0  2021-01-01 00:00:00-05:00   PYPL  234.309998                 NaN
1  2021-02-01 00:00:00-05:00   PYPL  259.850006           10.900093
2  2021-03-01 00:00:00-05:00   PYPL  242.839996           -6.546088
3  2021-04-01 00:00:00-04:00   PYPL  262.290009            8.009394
4  2021-05-01 00:00:00-04:00   PYPL  260.019989           -0.865462

Columns in all_stock_data:
Index(['Date', 'Ticker', 'Close', 'Monthly Return (%)'], dtype='object')

Raw fintech stock data saved.
Average fintech performance data saved.


In [1]:
import yfinance as yf
import pandas as pd

tickers = ['^GSPC', '^IXIC']  # S&P 500 and NASDAQ

start_date = '2021-01-01'
end_date = '2021-12-31'

all_index_data = pd.DataFrame()

for ticker in tickers:
    try:

        index = yf.Ticker(ticker)


        hist = index.history(start=start_date, end=end_date, interval="1mo")

        if hist.empty:
            print(f"No data for {ticker}, skipping.")
            continue


        hist.reset_index(inplace=True)


        hist['Ticker'] = 'S&P 500' if ticker == '^GSPC' else 'NASDAQ'


        hist['Monthly Return (%)'] = hist['Close'].pct_change() * 100


        hist = hist[['Date', 'Ticker', 'Close', 'Monthly Return (%)']]


        all_index_data = pd.concat([all_index_data, hist], ignore_index=True)

    except Exception as e:
        print(f"Error processing {ticker}: {e}")


all_index_data.dropna(inplace=True)

print(all_index_data.head())


all_index_data.to_csv("raw_index_data_2021.csv", index=False)
print("\nRaw index data saved.")


average_monthly_performance = all_index_data.groupby(['Date', 'Ticker']).agg({
    'Close': 'mean',
    'Monthly Return (%)': 'mean'
}).reset_index()


average_monthly_performance.dropna(inplace=True)

average_monthly_performance.to_csv("average_index_performance_2021.csv", index=False)

                       Date   Ticker        Close  Monthly Return (%)
1 2021-02-01 00:00:00-05:00  S&P 500  3811.149902            2.609145
2 2021-03-01 00:00:00-05:00  S&P 500  3972.889893            4.243863
3 2021-04-01 00:00:00-04:00  S&P 500  4181.169922            5.242532
4 2021-05-01 00:00:00-04:00  S&P 500  4204.109863            0.548649
5 2021-06-01 00:00:00-04:00  S&P 500  4297.500000            2.221401

Raw index data saved.


In [2]:
import pandas as pd


url_sp500_nasdaq = 'https://raw.githubusercontent.com/radosty/radosty.github.io/refs/heads/main/s%26p500andnasdaqstock.csv'
sp500_nasdaq_data = pd.read_csv(url_sp500_nasdaq)
url_fintech = 'https://raw.githubusercontent.com/radosty/radosty.github.io/refs/heads/main/fintechstock2.csv'
fintech_data = pd.read_csv(url_fintech)
sp500_nasdaq_data['Date'] = pd.to_datetime(sp500_nasdaq_data['Date'], utc=True).dt.tz_convert(None)
fintech_data['Date'] = pd.to_datetime(fintech_data['Date'], utc=True).dt.tz_convert(None)

# Rename columns in fintech_data to match sp500_nasdaq_data
fintech_data.rename(columns={
    'Average Closing Price': 'Close',
    'Average Monthly Return (%)': 'Monthly Return (%)'
}, inplace=True)

# Add 'Ticker' column to fintech_data
fintech_data['Ticker'] = 'Fintech'

# Combine the datasets
merged_data = pd.concat([sp500_nasdaq_data, fintech_data], ignore_index=True)

# Add new rows for January 1, 2021
new_data = pd.DataFrame({
    'Date': [pd.to_datetime('2021-01-01'), pd.to_datetime('2021-01-01')],
    'Ticker': ['S&P 500', 'NASDAQ'],
    'Close': [3793.75, None],
    'Monthly Return (%)': [None, 0.87]
})

# Concatenate the new data with the existing dataset
merged_data = pd.concat([merged_data, new_data], ignore_index=True)

# Group by year, month, and ticker, then take the last available value
merged_data['Year'] = merged_data['Date'].dt.year
merged_data['Month'] = merged_data['Date'].dt.month
grouped_data = merged_data.groupby(['Year', 'Month', 'Ticker']).last().reset_index()

# Recreate the Date column
grouped_data['Date'] = pd.to_datetime(grouped_data['Year'].astype(str) + '-' + grouped_data['Month'].astype(str) + '-01')

# Drop the Year and Month columns
grouped_data = grouped_data.drop(['Year', 'Month'], axis=1)

# Sort the dataset by Date and Ticker
grouped_data = grouped_data.sort_values(['Date', 'Ticker'])

# Impute missing values in 'Monthly Return (%)' with the mean
mean_monthly_return = grouped_data['Monthly Return (%)'].mean()
grouped_data['Monthly Return (%)'].fillna(mean_monthly_return, inplace=True)
mean_closing = grouped_data['Close'].mean()
grouped_data['Close'].fillna(mean_closing, inplace= True)

grouped_data.to_csv('monthly_stock_data3.csv', index=False)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  grouped_data['Monthly Return (%)'].fillna(mean_monthly_return, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  grouped_data['Close'].fillna(mean_closing, inplace= True)
