In [1]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import numpy as np

In [11]:
def download_historic_data(ticker):
    """It saves the data from 2006 to current date
    in csv file in current directory"""
    
    from datetime import datetime
    current_date = datetime.now()
    current_date = current_date.strftime("%Y-%m-%d")
    
    data = yf.download(ticker, start="2000-01-01", end="2024-12-30")
    data.reset_index(inplace=True)

    data = data.rename(columns={
        'Date': 'date',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close': 'close',
        'Volume': 'volume'
    })
    data['date']=pd.to_datetime(data['date'])
    

    csv_file = f'raw data/{ticker}_data.csv'
    data.to_csv(csv_file, index=False)  # index=False to exclude the default pandas index
    del_sec_row(csv_file)
    print(f"TCS data saved to {csv_file}")
   

def del_sec_row(file_path):
    import csv
    
    # Read and write in one go, skipping the second row
    with open(file_path, mode="r", newline="", encoding="utf-8") as file:
        rows = list(csv.reader(file))  # Load all rows
    
    # Remove the second row
    if len(rows) > 1:
        rows.pop(1)
    
    # Save the updated rows back to the file
    with open(file_path, mode="w", newline="", encoding="utf-8") as file:
        csv.writer(file).writerows(rows)
    
    
    

In [15]:
download_historic_data("MRF.NS")

[*********************100%***********************]  1 of 1 completed

TCS data saved to raw data/MRF.NS_data.csv





In [7]:
def technical_generator(ticker):
    file_path=f'raw data/{ticker}_data.csv'
    data=pd.read_csv(file_path)
    df=data


    data['date'] = pd.to_datetime(data['date'])
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['day'] = data['date'].dt.day
    data['dayofweek'] = data['date'].dt.dayofweek
    data['quarter'] = data['date'].dt.quarter
    data['dayofyear'] = data['date'].dt.dayofyear


    # Add Simple Moving Averages (SMA)
    data['SMA_5'] = ta.sma(data['close'], length=5)
    data['SMA_8'] = ta.sma(data['close'], length=8)
    df['SMA_13'] = ta.sma(df['close'], length=13)
    df['SMA_21'] = ta.sma(df['close'], length=21)
    df['SMA_50'] = ta.sma(df['close'], length=50)
   
    
    # Add Exponential Moving Averages (EMA)
    data['EMA_5'] = ta.ema(data['close'], length=5)
    data['EMA_8'] = ta.ema(data['close'], length=8)
    data['EMA_13'] = ta.ema(data['close'], length=13)
    df['EMA_21'] = ta.ema(df['close'], length=21)
    df['EMA_50'] = ta.ema(df['close'], length=50)
 
    
    # Add Stochastic Oscillator (%K and %D)
    stoch = ta.stoch(df['high'], df['low'], df['close'], k=14, d=3)
    df['Stoch_%K'] = stoch['STOCHk_14_3_3']
    df['Stoch_%D'] = stoch['STOCHd_14_3_3']
    
    # Add Average Directional Index (ADX)
    df['ADX'] = ta.adx(df['high'], df['low'], df['close'], length=14)['ADX_14']

    # Add Lag Features
    df['Close_lag1'] = df['close'].shift(1)
    data['close_lag2'] = data['close'].shift(2)

    
    # Add Daily Returns and Log Returns
    df['Daily_Return'] = df['close'].pct_change()
    df['Log_Return'] = np.log(df['close'] / df['close'].shift(1))



    
    # date is as index    
    data.set_index('date', inplace=True)
    

    # Assuming your data is loaded in the DataFrame 'data'
    # Ensure the date column is in datetime format and set it as index
    
    
    # Assuming 'data' is your DataFrame and 'date' is the index
    # Ensure the 'close' column is properly formatted as numeric
    data['close'] = pd.to_numeric(data['close'], errors='coerce')
    
    # Fill any NaN values that may appear (using .ffill)
    data['close'] = data['close'].ffill()
    
    
    
    # Calculate MACD using pandas_ta
    macd_result = ta.macd(data['close'], fast=12, slow=26, signal=9)
    
    # Add MACD components to the data
    data['MACD'] = macd_result['MACD_12_26_9']
    data['MACD_signal'] = macd_result['MACDs_12_26_9']
    data['MACD_hist'] = macd_result['MACDh_12_26_9']
    
    # 2. Calculate RSI (Relative Strength Index)
    data['RSI'] = ta.rsi(data['close'], length=14)
    
    # Step 2: Recalculate VWAP after setting 'date' as the index
    data['VWAP'] = ta.vwap(high=data['high'], low=data['low'], close=data['close'], volume=data['volume'])
    
    # Calculate Bollinger Bands (returns a DataFrame, not individual series)
    bbands = ta.bbands(data['close'], length=20, std=2)
    
    
    # Extract the individual bands from the result
    data['BB_upper'] = bbands['BBL_20_2.0']
    data['BB_middle'] = bbands['BBM_20_2.0']
    data['BB_lower'] = bbands['BBU_20_2.0']

    
    # 4. Calculate On-Balance Volume (OBV)
    data['OBV'] = ta.obv(data['close'], data['volume'])

    
    # Default parameters for AF (Acceleration Factor) are 0.02 and maximum AF is 0.2
    # Calculate Parabolic SAR
    # pandas_ta.psar returns multiple columns: `PSARl_0.02_0.2`, `PSARs_0.02_0.2`, and `PSAR_0.02_0.2`
    psar = ta.psar(data['high'], data['low'], data['close'], step=0.02, max_step=0.2)

    
    # Combine PSARl and PSARs into a single column
    data['PSAR'] = psar['PSARl_0.02_0.2'].combine_first(psar['PSARs_0.02_0.2'])


    # 5. Calculate ATR (Average True Range)
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'], length=14)



    


    # Calculate Ichimoku Cloud
    ichimoku = ta.ichimoku(data['high'], data['low'], data['close'], window1=9, window2=26, window3=52)
    # Extract Ichimoku components
    data['Tenkan-sen'] = ichimoku[0]['ITS_9']  # Conversion Line
    data['Kijun-sen'] = ichimoku[0]['IKS_26']  # Base Line
    data['Chikou Span'] = ichimoku[0]['ICS_26']  # Lagging Span

        

    # Calculate CCI Commodity Channel Index (CCI) 
    data['CCI'] = ta.cci(data['high'], data['low'], data['close'], length=20)



    # Reset index to make 'date' a regular column
    data = data.reset_index()


    # data=data_reset.dropna() 
    
    # Save the DataFrame to a CSV file
    # new_file=f'technical indicators/{ticker}_indicators.csv'
    # data.to_csv(new_file, index=False)
    # print(f"TCS data saved to {new_file}")
    return data

        

In [9]:
data=technical_generator("TCS.NS")

In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5555 entries, 0 to 5554
Data columns (total 43 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          5555 non-null   datetime64[ns]
 1   close         5555 non-null   float64       
 2   high          5555 non-null   float64       
 3   low           5555 non-null   float64       
 4   open          5555 non-null   float64       
 5   volume        5555 non-null   int64         
 6   year          5555 non-null   int32         
 7   month         5555 non-null   int32         
 8   day           5555 non-null   int32         
 9   dayofweek     5555 non-null   int32         
 10  quarter       5555 non-null   int32         
 11  dayofyear     5555 non-null   int32         
 12  SMA_5         5551 non-null   float64       
 13  SMA_8         5548 non-null   float64       
 14  SMA_13        5543 non-null   float64       
 15  SMA_21        5535 non-null   float64 