In [1]:
import os
import glob
import yaml #pip install pandas pyyaml
import pandas as pd
import mysql.connector #pip install mysql-connector-python
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Folder path                       
folder_path = r"D:\PROJECT_2\Data-Driven-Stock-Analysis\data"

# Month folders
month_name = [
    "2023-10","2023-11","2023-12","2024-01","2024-02","2024-03",
    "2024-04","2024-05","2024-06","2024-07","2024-08","2024-09","2024-10","2024-11"
]

# List to store all YAML data
records = []

# Loop through all month subfolders
for month in month_name:
    folder = os.path.join(folder_path, month)
    yaml_files = glob.glob(os.path.join(folder, "*.yaml"))

    for file_path in yaml_files:
        with open(file_path, "r") as file:
            data = yaml.safe_load(file)
            if data:
                # Append depending on type
                if isinstance(data, dict):
                    records.append(data)
                elif isinstance(data, list):
                    records.extend(data)
        print(f"Loaded {os.path.basename(file_path)} from {month}")


In [None]:
# Convert all to one DataFrame
df1= pd.DataFrame(records)

print(f"\nTotal records loaded: {len(df1)}")
print(df1.head())


In [None]:
df1.info()

In [20]:
def get_connection():
    return mysql.connector.connect(
        host="localhost",
        user="root",
        password="0007"
    )

In [None]:
# Establish connection
conn = get_connection()

# Create a cursor
cursor = conn.cursor()

# Create database
cursor.execute("CREATE DATABASE DATA")
print("Database 'DATA' created successfully!")

# Close connection
cursor.close()
conn.close()


In [None]:
# Connect to MySQL
conn = get_connection()
cursor = conn.cursor()
cursor.execute("USE DATA")

# Create table
cursor.execute("""
    CREATE TABLE IF NOT EXISTS MARKET (
        Ticker VARCHAR(50),
        close DECIMAL(10,2),
        date DATETIME,
        high DECIMAL(10,2),
        low DECIMAL(10,2),
        month VARCHAR(10),
        open DECIMAL(10,2),
        volume INT
    )
""")

# Commit and close
conn.commit()
cursor.close()
conn.close()

print("Table 'MARKET' created successfully!")

In [None]:
conn = get_connection()
cursor = conn.cursor()
cursor.execute("USE DATA")
# Loop through DataFrame rows
for _, row in df1.iterrows():
    cursor.execute("""
        INSERT INTO MARKET (Ticker, close, date, high, low, month,open,volume)
        VALUES (%s, %s, %s, %s, %s,%s, %s,%s)
    """, (
        row.get("Ticker"),
        row.get("close"),
        row.get("date"),
        row.get("high"),
        row.get("low"),
        row.get("month"),
        row.get("open"),
        row.get("volume")
    ))

# Commit changes and close connection
conn.commit()
cursor.close()
conn.close()

print("Data inserted successfully into 'MARKET'!")


In [45]:
# Folder to save CSV files
output_folder =os.path.join(folder_path,"output_csv") #r"D:\data_driven\output_csv"
os.makedirs(output_folder, exist_ok=True)
conn = get_connection()
cursor = conn.cursor()
cursor.execute("USE DATA")

# Step 1: Get distinct tickers
cursor.execute("SELECT DISTINCT Ticker FROM MARKET")
companies = [row[0] for row in cursor.fetchall()]  # flatten to list
print("Tickers found:", companies)

# Step 2: Loop through each ticker, fetch data, and save as CSV
for ticker in companies:
    print(f"Processing {ticker}...")

    cursor.execute("""
        SELECT Ticker, close, date, high, low, month, open,volume
        FROM MARKET
        WHERE Ticker = %s
    """, (ticker,))
    rows = cursor.fetchall()

    # Convert to DataFrame
    df = pd.DataFrame(rows, columns=["Ticker", "close", "date", "high", "low", "month", "open","volume"])

    # Step 3: Save to CSV
    csv_path = os.path.join(output_folder, f"{ticker}.csv")
    df.to_csv(csv_path, index=False)

    print(f"Saved: {csv_path}")

# Close connection
cursor.close()
conn.close()

print("✅ All ticker data exported to CSV successfully!")


Tickers found: ['SBIN', 'BAJFINANCE', 'TITAN', 'ITC', 'TCS', 'LT', 'TATACONSUM', 'RELIANCE', 'HCLTECH', 'JSWSTEEL', 'ULTRACEMCO', 'POWERGRID', 'INFY', 'TRENT', 'BHARTIARTL', 'TATAMOTORS', 'WIPRO', 'TECHM', 'NTPC', 'HINDUNILVR', 'APOLLOHOSP', 'M&M', 'GRASIM', 'ICICIBANK', 'ADANIENT', 'ADANIPORTS', 'BEL', 'BAJAJFINSV', 'EICHERMOT', 'COALINDIA', 'MARUTI', 'INDUSINDBK', 'ASIANPAINT', 'TATASTEEL', 'HDFCLIFE', 'DRREDDY', 'SUNPHARMA', 'KOTAKBANK', 'SHRIRAMFIN', 'NESTLEIND', 'ONGC', 'CIPLA', 'BPCL', 'BRITANNIA', 'SBILIFE', 'HINDALCO', 'HEROMOTOCO', 'AXISBANK', 'HDFCBANK', 'BAJAJ-AUTO']
Processing SBIN...
Saved: D:\PROJECT_2\Data-Driven-Stock-Analysis\data\output_csv\SBIN.csv
Processing BAJFINANCE...
Saved: D:\PROJECT_2\Data-Driven-Stock-Analysis\data\output_csv\BAJFINANCE.csv
Processing TITAN...
Saved: D:\PROJECT_2\Data-Driven-Stock-Analysis\data\output_csv\TITAN.csv
Processing ITC...
Saved: D:\PROJECT_2\Data-Driven-Stock-Analysis\data\output_csv\ITC.csv
Processing TCS...
Saved: D:\PROJECT_2\D