In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings

In [2]:
# Importing the data
df = pd.read_csv(r"C:\Users\LENOVO\Desktop\Data Downloads\Toyota_Data.csv")
df

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,1980-03-17,2.296798,3.291227,3.344743,3.291227,0.000000,41109
1,1980-03-18,2.306134,3.304606,3.358122,3.304606,0.000000,9343
2,1980-03-19,2.306134,3.304606,3.304606,3.304606,3.304606,0
3,1980-03-20,2.306134,3.304606,3.358122,3.304606,0.000000,10277
4,1980-03-21,2.362154,3.384880,3.438396,3.384880,0.000000,8409
...,...,...,...,...,...,...,...
11286,2024-12-20,178.169998,178.169998,179.919998,175.839996,175.839996,425700
11287,2024-12-23,180.449997,180.449997,180.619995,177.970001,179.119995,422700
11288,2024-12-24,181.429993,181.429993,181.720001,180.830002,181.000000,168600
11289,2024-12-26,197.360001,197.360001,198.000000,193.130005,195.970001,1281200


In [3]:
df.info

<bound method DataFrame.info of              Date   Adj Close       Close        High         Low        Open  \
0      1980-03-17    2.296798    3.291227    3.344743    3.291227    0.000000   
1      1980-03-18    2.306134    3.304606    3.358122    3.304606    0.000000   
2      1980-03-19    2.306134    3.304606    3.304606    3.304606    3.304606   
3      1980-03-20    2.306134    3.304606    3.358122    3.304606    0.000000   
4      1980-03-21    2.362154    3.384880    3.438396    3.384880    0.000000   
...           ...         ...         ...         ...         ...         ...   
11286  2024-12-20  178.169998  178.169998  179.919998  175.839996  175.839996   
11287  2024-12-23  180.449997  180.449997  180.619995  177.970001  179.119995   
11288  2024-12-24  181.429993  181.429993  181.720001  180.830002  181.000000   
11289  2024-12-26  197.360001  197.360001  198.000000  193.130005  195.970001   
11290  2024-12-27  199.520004  199.520004  201.000000  198.179993  200.360001

In [4]:
# Performing general analysis on the dataset.
df[['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']].describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
High,11291.0,72.478879,51.552662,3.237711,27.735613,66.650002,114.895,255.23
Low,11291.0,71.646418,50.976716,3.237711,27.548208,65.440002,113.625,253.59
Open,11291.0,67.955424,55.851523,0.0,0.0,66.0,114.294998,255.0
Close,11291.0,72.057776,51.295776,3.237711,27.548208,66.040001,114.295002,254.77
Adj Close,11291.0,63.586275,52.272225,2.259452,19.224636,50.5839,104.705669,254.77
Volume,11291.0,214660.442653,387425.599372,0.0,13500.0,91000.0,292300.0,18582700.0


In [5]:
# Creating new columns in the dataset containing 'Year', 'Month', and 'Day' separetely.
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

In [6]:
# Looking for outliers on the dataset, this will helps identify anomalies in the data and get a better
# understanding of the trends within the data. 

outliers = {}
for column in ['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']: 
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    # Defining the outlier bundaries:Values less than Q1 - 1.5 * IQR or greater than Q3 + 1.5 * IQR are considered outliers
    outliers[column] = df[(df[column] < Q1 - 1.5 * IQR) | (df[column] > Q3 + 1.5 * IQR)]
    print(f"Column: {column}")
    print(f"  Q1: {Q1}, Q3: {Q3}, IQR: {IQR}")
    print(f"  Number of outliers: {len(outliers[column])}")
    print(f"  Outliers:\n{outliers[column]}\n")

# Highest and lowest stock prices and volumes by day/month
highest_prices = df.loc[df['Adj Close'].idxmax()]
lowest_prices = df.loc[df['Adj Close'].idxmin()]
highest_volume = df.loc[df['Volume'].idxmax()]
lowest_volume = df.loc[df['Volume'].idxmin()]

print("\nHighest Adjusted Close Price:")
print(highest_prices)
print("\nLowest Adjusted Close Price:")
print(lowest_prices)
print("\nHighest Volume:")
print(highest_volume)
print("\nLowest Volume:")
print(lowest_volume)


Column: High
  Q1: 27.73561287, Q3: 114.89500045, IQR: 87.15938758
  Number of outliers: 17
  Outliers:
            Date   Adj Close       Close        High         Low        Open  \
11082 2024-03-01  246.039993  246.039993  246.539993  244.559998  244.660004   
11084 2024-03-05  248.190002  248.190002  249.990005  246.809998  247.710007   
11085 2024-03-06  252.399994  252.399994  253.429993  250.929993  252.149994   
11086 2024-03-07  247.940002  247.940002  248.050003  245.699997  246.929993   
11087 2024-03-08  242.820007  242.820007  245.800003  242.580002  245.000000   
11095 2024-03-20  246.720001  246.720001  246.800003  244.309998  244.679993   
11096 2024-03-21  251.479996  251.479996  251.789993  250.259995  251.449997   
11097 2024-03-22  254.770004  254.770004  255.229996  253.589996  255.000000   
11098 2024-03-25  252.410004  252.410004  253.419998  251.039993  252.110001   
11099 2024-03-26  253.720001  253.720001  254.789993  253.389999  253.889999   
11100 2024-03-27

In [7]:
# From this analysis, we can tell that we have quite a number of outliers in most of the columns. 
# Compared to the Honda stock that only had outier values in it's volume column.

In [8]:
# Trend analysis check, beginning with difference between individual days High and Low prices.
df['Daily_Change'] = df['High'] - df['Low']
df['Daily_Change']

0        0.053516
1        0.053516
2        0.000000
3        0.053516
4        0.053516
           ...   
11286    4.080002
11287    2.649994
11288    0.889999
11289    4.869995
11290    2.820007
Name: Daily_Change, Length: 11291, dtype: float64

In [9]:
# And the monthly average adjusted close price.
monthly_avg = df.groupby(['Year', 'Month'])['Adj Close'].mean().reset_index()
monthly_avg['Year-Month'] = monthly_avg['Year'].astype(str) + '-' + monthly_avg['Month'].astype(str).str.zfill(2)

# Creating a line chart to represent the data
fig1 = px.line(
    monthly_avg,
    x='Year-Month',
    y='Adj Close',
    title="Toyota's Monthly Average Adjusted Closing Price",
    labels={'Adj Close': 'Average Adj Close Price', 'Year-Month': 'Year-Month'}
)
# Adjusting the chart appearance
fig1.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')

# Saving the chart as an interactive html file to the local folder  
# fig1.write_html("toyota_monthly_avg_adj_close_price.html")
fig1.show()

In [10]:
# Checking for volume over time, using a bar chart.
fig2 = px.bar(
    df,
    x='Date',
    y='Volume',
    title='Toyota Volume of Trade Over Time',
    labels={'Volume': 'Trading Volume', 'Date': 'Date'},
    template='presentation'
)
fig2.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig2.write_html("Toyota_Volume_over_Time.html")
fig2.show()

In [11]:
# Checking for yearly trends under adj_close.
yearly_trends = df.groupby('Year')['Adj Close'].mean().reset_index()
fig3 = px.line(
    yearly_trends,
    x='Year',
    y='Adj Close',
    title="Toyota's Yearly Average Adjusted Close Price",
    labels={'Adj Close': 'Average Adj Close Price', 'Year': 'Year'},
)
fig3.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig3.write_html("Toyota_Yearly_Trends.html")
fig3.show()

In [12]:
# Trend analysis on the monthly average adjusted close price.
monthly_trends = df.groupby(['Year', 'Month'])['Adj Close'].mean().reset_index()
monthly_trends['Year-Month'] = monthly_trends['Year'].astype(str) + '-' + monthly_trends['Month'].astype(str).str.zfill(2)
fig4 = px.line(
    monthly_trends,
    x='Year-Month',
    y='Adj Close',
    title= "Toyota's Monthly Average Adjusted Close Price",
    labels={'Adj Close': 'Average Adj Close Price', 'Year-Month': 'Year-Month'}
)
fig4.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig4.write_html("Toyota_Monthly_Average_Adjusted_Close_Price.html")
fig4.show()

In [13]:
# Volatility Analysis.
volatility = df.groupby('Year')['Daily_Change'].mean().reset_index()
fig5 = px.bar(
    volatility,
    x='Year',
    y='Daily_Change',
    title='Average Yearly Volatility (High - Low)',
    labels={'Daily_Change': 'Average Daily Price Change', 'Year': 'Year'}
)
fig5.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig5.write_html("Toyota_Average_Yearly_Volatility.html")
fig5.show()

In [14]:
# Checking for the monthly trading volume using a bar chart
volume_analysis = df.groupby(['Year', 'Month'])['Volume'].sum().reset_index()
# Concatenating year and month into one string and converting them into strings
volume_analysis['Year-Month'] = volume_analysis['Year'].astype(str) + '-' + volume_analysis['Month'].astype(str).str.zfill(2)
fig6 = px.bar(
    volume_analysis,
    x='Year-Month',
    y='Volume',
    title="Toyota's Monthly Trading Volume",
    labels={'Volume': 'Total Trading Volume', 'Year-Month': 'Year-Month'},
    template='presentation'

)
fig6.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig6.write_html("Toyota_Monthly_Trading_Volume.html")
fig6.show()

In [15]:
# Next we look for correlations in the data
price_columns = ['High', 'Low', 'Open', 'Close', 'Adj Close']
price_correlation = df[price_columns].corr()
price_correlation

Unnamed: 0,High,Low,Open,Close,Adj Close
High,1.0,0.999924,0.991119,0.999953,0.991281
Low,0.999924,1.0,0.991014,0.999953,0.99145
Open,0.991119,0.991014,1.0,0.991121,0.979054
Close,0.999953,0.999953,0.991121,1.0,0.991369
Adj Close,0.991281,0.99145,0.979054,0.991369,1.0


In [16]:
# Creating a correlation heatmap for the stock prices.
fig7 = px.imshow(
    price_correlation,
    text_auto=True,
    title='Correlation Between Toyota Stock Prices',
    labels=dict(color='Correlation'),
    color_continuous_scale='mint' 
)
fig7.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig7.write_html("Toyota_Correlation_Heatmap.html")
fig7.show()

# Overall, the heatmap shows that there is a strong correlation between Stock prices, with all values boasting 
# a correlation coefficient of above 0.99.

In [17]:
# Making a scatter plot showing the relation between a particular day’s open and close prices.
fig8 = px.scatter(
    df,
    x='Open',
    y='Close',
    trendline='ols',
    title="Relationship Between Toyota's Open and Close Prices",
    labels={'Open': 'Opening Price', 'Close': 'Closing Price'}
)
fig8.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig8.write_html("Toyota_Open_Close_Scatter.html")
fig8.show()

In [18]:
fig9 = px.scatter(
    df,
    x='Adj Close',
    y='Close',
    trendline='ols',
    title="Relationship Between Adjacent Toyota's Close Prices and Close Prices",
    labels={'Adj Close': 'Adjacent Close Price', 'Close': 'Closing Price'}
)
fig9.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig9.write_html("Toyota_Adj_Close_and_Close_Scatter.html")
fig9.show()

In [19]:
# Volume vs daily price changes
df['Daily_Change']= df['High'] - df['Low']
fig10= px.scatter(
    df,
    x='Volume',
    y='Daily_Change',
    trendline='ols',
    title="Toyota's Volume vs. Daily Price Change",
    labels={'Volume': 'Trading Volume', 'Daily_Change': 'Daily Price Change (High - Low)'}
)
fig10.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig10.write_html("Toyota_Volume_vs_Daily_Price_Change_Scatter.html")
fig10.show()

In [20]:
# Daily percentage changes in Adj_Close
df['Daily_Percentage_Change'] = df['Adj Close'].pct_change() * 100

# Identifying the best and worst-performing days
best_day = df.loc[df['Daily_Percentage_Change'].idxmax()]
worst_day = df.loc[df['Daily_Percentage_Change'].idxmin()]

# Calculating the average daily return and cumulative return
average_daily_return = df['Daily_Percentage_Change'].mean()
df['Cumulative_Return'] = (1 + df['Daily_Percentage_Change'] / 100).cumprod()

# Moving averages for 50, 100 and 200 days
df['MA_50'] = df['Adj Close'].rolling(window=50).mean()
df['MA_100'] = df['Adj Close'].rolling(window=100).mean()
df['MA_200'] = df['Adj Close'].rolling(window=200).mean()

# Calculating the price spread
df['Price_Spread'] = df['High'] - df['Low']

In [21]:
# Investigating the daily average return, maximum and minimum return to keep in mind for future reference
average_daily_return = df['Daily_Percentage_Change'].mean()
print(f"Average Daily Percentage Change: {average_daily_return: .2f}%" )
print(f"Best Daily Percentage Change: {df.loc[df['Daily_Percentage_Change'].idxmax(), 'Daily_Percentage_Change']:.2f}%")
print(f"Worst Daily Percentage Change: {df.loc[df['Daily_Percentage_Change'].idxmin(), 'Daily_Percentage_Change']:.2f}%")

Average Daily Percentage Change:  0.06%
Best Daily Percentage Change: 19.35%
Worst Daily Percentage Change: -16.52%


In [22]:
# Daily percent change in Adj_Close prices.
fig11 = px.line(
    df,
    x='Date',
    y='Daily_Percentage_Change',
    title="Toyota's Daily Percentage Change in Adj Close Price",
    labels={'Daily_Percentage_Change': 'Daily % Change', 'Date': 'Date'}
)
fig11.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig11.write_html("Toyota_Daily_Pct_Change_in_Adj_Close.html")
fig11.show()

In [23]:
# Checking for the cumulative return over the years
fig12 = px.line(
    df,
    x='Date',
    y='Cumulative_Return',
    title="Toyota's Cumulative Return Over Time",
    labels={'Cumulative_Return': 'Cumulative Return', 'Date': 'Date'}
)
fig12.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig12.write_html("Toyota_Cumulative_Returns.html")
fig12.show()

In [24]:
# Relationship between the moving averages and the Adj_Close prices
fig13 = go.Figure() # Initialise a blank figure to plot

# Adding the traces: each trace adds a line plot(scatter) to the figure
fig13.add_trace(go.Scatter(x=df['Date'], y=df['Adj Close'], mode='lines', name='Adj Close'))
fig13.add_trace(go.Scatter(x=df['Date'], y=df['MA_50'], mode='lines', name='50-Day MA'))
fig13.add_trace(go.Scatter(x=df['Date'], y=df['MA_100'], mode='lines', name='100-Day MA'))
fig13.add_trace(go.Scatter(x=df['Date'], y=df['MA_200'], mode='lines', name='200-Day MA'))
fig13.update_layout(
    title="Toyota's Moving Averages and Adj Close Price",
    xaxis_title='Date',
    yaxis_title='Price',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white'
)
fig13.update_layout(plot_bgcolor='tan', paper_bgcolor='tan', font_color='black')
# fig13.write_html("Toyota_Moving_Averages_vs_Adj_Close_Price.html")
fig13.show()
