In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.offline as pyo
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from plotly.subplots import make_subplots
import plotly.express as px
import altair as alt

In [2]:
!pip install dash plotly
!pip install mplfinance



In [3]:
data = pd.read_csv('/content/NIFTY50_1D_Data.csv')

## **NIFTY_50 Data Description**

###Date: The trading day.
###Open: The index value at the beginning of the trading day.
###High: The highest index value during the trading day.
###Low: The lowest index value during the trading day.
###Close: The index value at the end of the trading day.

In [4]:
data['Date'] = pd.to_datetime(data['Date'])

In [5]:
data.describe()

Unnamed: 0,Open,High,Low,Close
count,3543.0,3543.0,3543.0,3543.0
mean,9955.231075,10004.615437,9889.987783,9948.448876
std,4442.009085,4455.669395,4419.637949,4439.504736
min,4567.2998,4623.1499,4531.1499,4544.2002
25%,5937.05005,5971.1001,5892.5,5936.94995
50%,8694.2998,8728.3496,8631.1504,8683.8496
75%,11870.625,11926.3,11794.425,11845.825
max,21737.65,21801.449,21678.0,21778.699


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3543 entries, 0 to 3542
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    3543 non-null   datetime64[ns]
 1   Open    3543 non-null   float64       
 2   High    3543 non-null   float64       
 3   Low     3543 non-null   float64       
 4   Close   3543 non-null   float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 138.5 KB


In [7]:
trace_open = go.Scatter(x=data['Date'], y=data['Open'], mode='lines', name='Open')
trace_high = go.Scatter(x=data['Date'], y=data['High'], mode='lines', name='High')
trace_low = go.Scatter(x=data['Date'], y=data['Low'], mode='lines', name='Low')
trace_close = go.Scatter(x=data['Date'], y=data['Close'], mode='lines', name='Close')

data1 = [trace_open, trace_high, trace_low, trace_close]

layout = go.Layout(title='Stock Prices',
                   xaxis=dict(title='Date', rangeslider=dict(visible=True), rangebreaks=[
                       dict(bounds=["sat", "mon"]),
                       dict(values=["2010-01-01", "2011-01-01", "2012-01-01", "2013-01-01",
                                    "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01",
                                    "2018-01-01", "2019-01-01", "2020-01-01", "2021-01-01",
                                    "2022-01-01", "2023-01-01"],
                            )
                   ]),
                   yaxis=dict(title='Price'),
                   showlegend=True)

fig = go.Figure(data=data1, layout=layout)
pyo.iplot(fig)

In [8]:
data.set_index('Date', inplace=True)

## **Hourly Data Analysis**

In [9]:
hourly_data = data.copy()

In [10]:
hourly_data['Average'] = (hourly_data['Low'] + hourly_data['Open'] + hourly_data['Close'] + hourly_data['High']) / 4

In [11]:
hourly_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Average
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-09-01,4662.2002,4735.8999,4600.6499,4625.3501,4656.025025
2009-09-02,4624.0,4650.4502,4576.6001,4608.3501,4614.8501
2009-09-03,4608.75,4647.3501,4581.0498,4593.5498,4607.674925
2009-09-04,4608.7002,4697.2002,4580.3501,4680.3999,4641.6626
2009-09-07,4682.3999,4790.0,4679.2998,4782.8999,4733.6499


In [12]:
hourly_data['Daily_Returns(%)'] = ((hourly_data['Average'] - hourly_data['Average'].shift(1)) / hourly_data['Average'].shift(1)) * 100

In [13]:
hourly_data['Daily_Returns(%)'].fillna('None', inplace=True)

In [14]:
hourly_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Average,Daily_Returns(%)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-09-01,4662.2002,4735.8999,4600.6499,4625.3501,4656.025025,
2009-09-02,4624.0,4650.4502,4576.6001,4608.3501,4614.8501,-0.884336
2009-09-03,4608.75,4647.3501,4581.0498,4593.5498,4607.674925,-0.15548
2009-09-04,4608.7002,4697.2002,4580.3501,4680.3999,4641.6626,0.737632
2009-09-07,4682.3999,4790.0,4679.2998,4782.8999,4733.6499,1.981775


In [15]:
hourly_data['trend'] = 'BEAR'
hourly_data.loc[hourly_data.Open > hourly_data.Average.shift(1), 'trend'] = 'BULL'
hourly_data['trend'][0] = None



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [16]:
hourly_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Average,Daily_Returns(%),trend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009-09-01,4662.2002,4735.8999,4600.6499,4625.3501,4656.025025,,
2009-09-02,4624.0,4650.4502,4576.6001,4608.3501,4614.8501,-0.884336,BEAR
2009-09-03,4608.75,4647.3501,4581.0498,4593.5498,4607.674925,-0.15548,BEAR
2009-09-04,4608.7002,4697.2002,4580.3501,4680.3999,4641.6626,0.737632,BULL
2009-09-07,4682.3999,4790.0,4679.2998,4782.8999,4733.6499,1.981775,BULL


The `trend_probability` function calculates the probability of bullish and bearish trends in financial data over specified timeframes (weekly, monthly, or yearly). It takes a Pandas DataFrame with a 'trend' column indicating 'BULL' or 'BEAR' trends for each entry and resamples this data according to the given timeframe. Within each period, it computes the ratio of bullish to bearish entries, providing an understanding of trend dominance over time. The result is a DataFrame with probabilities of bullish and bearish trends for each resampled period.

In [17]:
import pandas as pd

def trend_probability(data, timeframe):
    """
    Calculates the probability of a trend for a given timeframe.

    Args:
      data: A Pandas DataFrame containing the data.
      timeframe: The timeframe for which to calculate the probability ('W' for weekly, 'M' for monthly, 'Y' for yearly).

    Returns:
      A dictionary containing the probability of a BULL and BEAR trend.
    """

    # Ensure the index is in datetime format for resampling
    if not pd.api.types.is_datetime64_any_dtype(data.index):
        data.index = pd.to_datetime(data.index)

    # Define a function to calculate probabilities within each group
    def calculate_probabilities(group):
        bull_count = group[group['trend'] == 'BULL'].shape[0]
        bear_count = group[group['trend'] == 'BEAR'].shape[0]
        total_count = bull_count + bear_count

        # Avoid division by zero
        if total_count > 0:
            bull_prob = bull_count / total_count
            bear_prob = bear_count / total_count
        else:
            bull_prob, bear_prob = 0, 0

        return pd.Series({'BULL': bull_prob, 'BEAR': bear_prob})

    probabilities = data.resample(timeframe).apply(calculate_probabilities)

    return probabilities

In [18]:
week_BULL_BEAR_prob = trend_probability(hourly_data, 'W')
month_BULL_BEAR_prob = trend_probability(hourly_data, 'M')
year_BULL_BEAR_prob = trend_probability(hourly_data, 'Y')

total hear dates from 2009 to 2023 In between these years we has 3 elections we are trying to found what happens during (2009, 2014, 2019)

In [19]:
week_BULL_BEAR_prob.head()

Unnamed: 0_level_0,BULL,BEAR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-09-06,0.333333,0.666667
2009-09-13,0.8,0.2
2009-09-20,0.6,0.4
2009-09-27,0.75,0.25
2009-10-04,0.666667,0.333333


In [20]:
month_BULL_BEAR_prob.head()

Unnamed: 0_level_0,BULL,BEAR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-09-30,0.631579,0.368421
2009-10-31,0.263158,0.736842
2009-11-30,0.65,0.35
2009-12-31,0.428571,0.571429
2010-01-31,0.368421,0.631579


In [21]:
year_BULL_BEAR_prob.head()

Unnamed: 0_level_0,BULL,BEAR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-12-31,0.493671,0.506329
2010-12-31,0.581673,0.418327
2011-12-31,0.506073,0.493927
2012-12-31,0.550607,0.449393
2013-12-31,0.504032,0.495968


I analyzed the monthly probabilities of bullish and bearish trends in the dataset to understand the dominant market trends on a monthly basis.

In [22]:
month_BULL_BEAR_prob.reset_index(inplace=True)
month_BULL_BEAR_prob['Date'] = pd.to_datetime(month_BULL_BEAR_prob['Date'])

bull_data = month_BULL_BEAR_prob[['Date', 'BULL']]

# Further filter to include only the years 2009, 2014, and 2019
selected_years_bull = bull_data[bull_data['Date'].dt.year.isin([2009, 2014, 2019])]

# Create an interactive selection
selection = alt.selection_multi(fields=['year(Date)'], bind='legend')

# Plot
chart = alt.Chart(selected_years_bull).mark_line().encode(
    x='month(Date):O',  # O for ordinal
    y='BULL:Q',
    color='year(Date):N',
    tooltip=['year(Date)', 'month(Date)', 'BULL'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
).properties(
    title='Yearly BULL Trend Comparison for 2009, 2014, and 2019',
    width=600,
    height=400
)

chart.display()


'selection_multi' is deprecated.  Use 'selection_point'


'add_selection' is deprecated. Use 'add_params' instead.



In [23]:
import altair as alt
import pandas as pd

# Assuming 'month_BULL_BEAR_prob' is your DataFrame and it's indexed by 'Date'

# Filter out only BULL data
bull_data = month_BULL_BEAR_prob[['Date', 'BULL']]

# Filter to exclude the years 2009, 2014, and 2019
other_years_bull = bull_data[~bull_data['Date'].dt.year.isin([2009, 2014, 2019])]

# Create an interactive selection
selection = alt.selection_multi(fields=['year(Date)'], bind='legend')

# Plot for all years except 2009, 2014, and 2019
chart_all_except_selected = alt.Chart(other_years_bull).mark_line().encode(
    x='month(Date):O',  # O for ordinal
    y='BULL:Q',
    color=alt.Color('year(Date):N', scale=alt.Scale(scheme='category20b')),  # Using a 20-color scheme for distinction
    tooltip=['year(Date)', 'month(Date)', 'BULL'],
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
).properties(
    title='Yearly BULL Trend Comparison (Excluding 2009, 2014, and 2019)',
    width=600,
    height=400
)

chart_all_except_selected.display()



'selection_multi' is deprecated.  Use 'selection_point'


'add_selection' is deprecated. Use 'add_params' instead.



### **Conclusion:01**

In our analysis of the NIFTY 50 index focusing on monthly bullish trend probabilities, especially during the general election years of 2009, 2014, and 2019, we observed several key patterns:

- **Early March Peaks**: In election years, there's a significant peak in bullish trend probabilities during the early weeks of March. This is above the average when compared to other years where peaks are more common towards the end of March and the beginning of April.

- **Trend Duration**: Once a bullish trend begins, it typically lasts for around three months before it starts to decline or reverse. This suggests a window of opportunity for investors following the start of a bullish trend.

- **Election Year Fluctuations**: Major market fluctuations tend to occur approximately every three years, coinciding with election years. These fluctuations can be upwards or downwards, indicating heightened market volatility and investor uncertainty during these times.

- **Increased Bullish Probability**: During election years, the probability of a bullish trend, indicating potential market upswings, noticeably increases. For example:
  - In 2009, the bullish probability in October was 0.2631, which is relatively low.
  - By contrast, in 2014 and 2019, the probabilities rose to 0.5555 and 0.8, respectively, showing a marked increase in investor optimism or market momentum.

- **Additional Investment Windows**: Aside from the March peaks in election years, another period of interest is between mid-September and mid-November. This period also shows potential for market fluctuations, providing another strategic investment window.

- **Conclusion and Strategic Insight**:
  - The early weeks of March, particularly in election years, offer a high probability for bullish trends, signaling a favorable time for investment.
  - The period from September to mid-November also presents opportunities for market fluctuations, which, with careful analysis, could yield significant returns.
  - Observations from 2009, 2014, and 2019 highlight the impact of political events on market dynamics, emphasizing the importance of political awareness in investment strategy.

## **Weekly Data Analysis**

In [24]:
weekly_data = data.resample('W').mean()

In [25]:
weekly_data.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-09-06,4625.9126,4682.7251,4584.662475,4626.912475
2009-09-13,4780.77998,4840.48994,4768.97988,4810.26992
2009-09-20,4891.17002,4936.37998,4873.05996,4920.13994
2009-09-27,4989.825075,5019.525025,4942.362425,4983.9126
2009-10-04,5018.0,5072.783367,5006.833333,5058.066733


In [26]:
weekly_data['Average'] = (weekly_data['Low'] + weekly_data['Open'] + weekly_data['Close'] + weekly_data['High']) / 4

In [27]:
weekly_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Average
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-09-06,4625.9126,4682.7251,4584.662475,4626.912475,4630.053163
2009-09-13,4780.77998,4840.48994,4768.97988,4810.26992,4800.12993
2009-09-20,4891.17002,4936.37998,4873.05996,4920.13994,4905.187475
2009-09-27,4989.825075,5019.525025,4942.362425,4983.9126,4983.906281
2009-10-04,5018.0,5072.783367,5006.833333,5058.066733,5038.920858


In [28]:
# weekly_data['Average_Return(%)'] = weekly_data['Average'].pct_change() * 100

In [29]:
weekly_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Average
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-09-06,4625.9126,4682.7251,4584.662475,4626.912475,4630.053163
2009-09-13,4780.77998,4840.48994,4768.97988,4810.26992,4800.12993
2009-09-20,4891.17002,4936.37998,4873.05996,4920.13994,4905.187475
2009-09-27,4989.825075,5019.525025,4942.362425,4983.9126,4983.906281
2009-10-04,5018.0,5072.783367,5006.833333,5058.066733,5038.920858


In [30]:
weekly_data['NEW(%)'] = ((weekly_data['Open'] - weekly_data['Average'].shift(1)) / weekly_data['Average'].shift(1)) * 100

In [31]:
weekly_data

Unnamed: 0_level_0,Open,High,Low,Close,Average,NEW(%)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-09-06,4625.912600,4682.725100,4584.662475,4626.912475,4630.053163,
2009-09-13,4780.779980,4840.489940,4768.979880,4810.269920,4800.129930,3.255401
2009-09-20,4891.170020,4936.379980,4873.059960,4920.139940,4905.187475,1.896617
2009-09-27,4989.825075,5019.525025,4942.362425,4983.912600,4983.906281,1.725471
2009-10-04,5018.000000,5072.783367,5006.833333,5058.066733,5038.920858,0.684076
...,...,...,...,...,...,...
2023-12-03,20030.950250,20117.937500,19988.962500,20096.837250,20058.671875,1.265341
2023-12-10,20845.619800,20895.200000,20756.910000,20870.030000,20841.939950,3.923230
2023-12-17,21062.290200,21143.440200,20974.019800,21093.839800,21068.397500,1.057244
2023-12-24,21357.150000,21451.940400,21199.940000,21325.270200,21333.575150,1.370548


In [32]:
weekly_data['Upper_Range'] = weekly_data['NEW(%)'] + 1
weekly_data['Lower_Range'] = weekly_data['NEW(%)'] - 1

In [33]:
weekly_data

Unnamed: 0_level_0,Open,High,Low,Close,Average,NEW(%),Upper_Range,Lower_Range
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2009-09-06,4625.912600,4682.725100,4584.662475,4626.912475,4630.053163,,,
2009-09-13,4780.779980,4840.489940,4768.979880,4810.269920,4800.129930,3.255401,4.255401,2.255401
2009-09-20,4891.170020,4936.379980,4873.059960,4920.139940,4905.187475,1.896617,2.896617,0.896617
2009-09-27,4989.825075,5019.525025,4942.362425,4983.912600,4983.906281,1.725471,2.725471,0.725471
2009-10-04,5018.000000,5072.783367,5006.833333,5058.066733,5038.920858,0.684076,1.684076,-0.315924
...,...,...,...,...,...,...,...,...
2023-12-03,20030.950250,20117.937500,19988.962500,20096.837250,20058.671875,1.265341,2.265341,0.265341
2023-12-10,20845.619800,20895.200000,20756.910000,20870.030000,20841.939950,3.923230,4.923230,2.923230
2023-12-17,21062.290200,21143.440200,20974.019800,21093.839800,21068.397500,1.057244,2.057244,0.057244
2023-12-24,21357.150000,21451.940400,21199.940000,21325.270200,21333.575150,1.370548,2.370548,0.370548


In [34]:
weekly_data['Within_Bounds'] = ((weekly_data['NEW(%)'] >= weekly_data['Lower_Range']) & (weekly_data['NEW(%)'] <= weekly_data['Upper_Range'])).astype(int)

In [35]:
weekly_data

Unnamed: 0_level_0,Open,High,Low,Close,Average,NEW(%),Upper_Range,Lower_Range,Within_Bounds
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2009-09-06,4625.912600,4682.725100,4584.662475,4626.912475,4630.053163,,,,0
2009-09-13,4780.779980,4840.489940,4768.979880,4810.269920,4800.129930,3.255401,4.255401,2.255401,1
2009-09-20,4891.170020,4936.379980,4873.059960,4920.139940,4905.187475,1.896617,2.896617,0.896617,1
2009-09-27,4989.825075,5019.525025,4942.362425,4983.912600,4983.906281,1.725471,2.725471,0.725471,1
2009-10-04,5018.000000,5072.783367,5006.833333,5058.066733,5038.920858,0.684076,1.684076,-0.315924,1
...,...,...,...,...,...,...,...,...,...
2023-12-03,20030.950250,20117.937500,19988.962500,20096.837250,20058.671875,1.265341,2.265341,0.265341,1
2023-12-10,20845.619800,20895.200000,20756.910000,20870.030000,20841.939950,3.923230,4.923230,2.923230,1
2023-12-17,21062.290200,21143.440200,20974.019800,21093.839800,21068.397500,1.057244,2.057244,0.057244,1
2023-12-24,21357.150000,21451.940400,21199.940000,21325.270200,21333.575150,1.370548,2.370548,0.370548,1


In [36]:
weekly_data['open_upper'] = weekly_data['Open'] + (weekly_data['Open'] * weekly_data['Upper_Range'] * 0.01)
weekly_data['open_lower'] = weekly_data['Open'] - (weekly_data['Open'] * weekly_data['Lower_Range'] * 0.01)

In [37]:
weekly_data['indication'] = np.where(
    (weekly_data['Open'] >= weekly_data['open_lower']) &
    (weekly_data['Open'] <= weekly_data['open_upper']) &
    (weekly_data['Close'] >= weekly_data['open_lower']) &
    (weekly_data['Close'] <= weekly_data['open_upper']) &
    (weekly_data['High'] >= weekly_data['open_lower']) &
    (weekly_data['High'] <= weekly_data['open_upper']) &
    (weekly_data['Low'] >= weekly_data['open_lower']) &
    (weekly_data['Low'] <= weekly_data['open_upper']),
    0, 1)

In [38]:
weekly_data['indication'].value_counts(1)

1    0.768717
0    0.231283
Name: indication, dtype: float64

### **Conclusion:02**

In our study of the NIFTY 50 dataset, we delved into an innovative approach to forecasting daily trading range probabilities based on the opening value for the day (Open) and the average value from the previous day (Average). Here's a structured summary of our methodology and findings:

- **Data Preparation**:
  - We introduced a new column to hold the average of all rows, thus incorporating a historical perspective into our analysis.
  - We devised a formula to calculate the percentage change between the current day's opening value and the previous day's average, aiming to gauge the day-to-day volatility.

- **Formula for Percentage Change**:
  - Our key formula: `NEW(%) = ((Open - Average.shift(1)) / Average.shift(1)) * 100`
  - This formula calculates the relative change and expresses it as a percentage, serving as a basis for our predictive model.

- **Defining Range Bounds**:
  - With the derived percentages, we established a range by adding and subtracting a certain alpha percentage value to/from the 'Open' value, setting upper and lower bounds for our expected trading range.

- **Range Validation**:
  - A new column was created to validate whether the day's trading values (Open, Close, High, Low) fell within our calculated range. A value of 1 was assigned if all values were within bounds, indicating a successful prediction, and 0 otherwise.

- **Experimental Results**:
  - We experimented with different alpha values to adjust the range flexibility and observed the following probabilities for successful range predictions:
    - For alpha = 0.5, the probability of success was 0.65508.
    - For alpha = 1, the probability increased to 0.768717.
    - For alpha = 1.5, the probability further rose to 0.860963.

- **Conclusions**:
  - The results demonstrate a clear correlation between the alpha value and the probability of the day's trading values falling within our predicted range. As the alpha value increases, providing a wider range, the probability of encapsulating the trading values also increases.
  - This method provides a straightforward yet effective way to anticipate the trading range based on the previous day's average and the current day's opening value. It highlights the potential of historical data in enhancing the accuracy of market predictions.

- **Practical Implications**:
  - Traders and analysts can utilize this approach to set more informed stop-loss and take-profit levels, based on the calculated range probabilities.
  - The method offers a quantifiable means to assess daily market volatility and potential trading boundaries, aiding in risk management and trading strategy formulation.

This analysis underscores the value of leveraging historical market data and statistical techniques to forecast market behavior, presenting a valuable tool in the arsenal of traders and market analysts.