In [1]:
import os
import pandas as pd
import sqlite3

In [3]:
directory_path = 'wse stocks/'
columns = ["TICKER", "PER", "DATE", "TIME", "OPEN", "HIGH", "LOW", "CLOSE", "VOL", "OPENINT"]
all_dfs = []

In [7]:
# only create financial_data.db
for filename in os.listdir(directory_path):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory_path, filename)
        try:
            temp_df = pd.read_csv(file_path, delimiter=',')  
            if temp_df.shape[1] == len(columns):
                temp_df.columns = columns  
                all_dfs.append(temp_df)  
            else:
                print(f'File {file_path} does not match the expected number of columns.')
        except Exception as e:
            pass

combined_df = pd.concat(all_dfs, ignore_index=True)
combined_df['DATE'] = pd.to_datetime(combined_df['DATE'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
sqlite_db_path = 'data/financial_data.db'
conn = sqlite3.connect(sqlite_db_path)
table_name = 'stock_data'
combined_df.to_sql(table_name, conn, if_exists='replace', index=False)
conn.close()

In [8]:
# only create combined_data.csv
for filename in os.listdir(directory_path):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory_path, filename)
        try:
            temp_df = pd.read_csv(file_path, delimiter=',')
            if temp_df.shape[1] == len(columns):
                temp_df.columns = columns  
                all_dfs.append(temp_df)  
            else:
                print(f'File {file_path} does not match the expected number of columns.')
        except Exception as e:
            pass

combined_df = pd.concat(all_dfs, ignore_index=True)
combined_df['DATE'] = pd.to_datetime(combined_df['DATE'], format='%Y%m%d').dt.strftime('%Y-%m-%d')
output_file_path = 'data/combined_data.csv'
combined_df.to_csv(output_file_path, index=False)

In [4]:
# Save data to database and monthly/quartely data aggregation 
for filename in os.listdir(directory_path):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory_path, filename)
        try:
            if os.path.getsize(file_path) == 0:
                continue
            temp_df = pd.read_csv(file_path, delimiter=',')
            if temp_df.shape[1] == len(columns):
                temp_df.columns = columns  
                all_dfs.append(temp_df) 
            else:
                print(f'File {file_path} does not match the expected number of columns.')
        except Exception as e:
            pass

if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)
    combined_df['DATE'] = pd.to_datetime(combined_df['DATE'], format='%Y%m%d')
    sqlite_db_path = 'data/financial_data.db'
    conn = sqlite3.connect(sqlite_db_path)
    table_name = 'stock_data'
    combined_df.to_sql(table_name, conn, if_exists='replace', index=False)
    cursor = conn.cursor()

    # Zapytanie SQL do agregacji danych miesięcznych
    query_monthly = """
    SELECT
        TICKER,
        strftime('%Y-%m', DATE) as MONTH,
        AVG(OPEN) as AVG_OPEN,
        AVG(CLOSE) as AVG_CLOSE
    FROM stock_data
    GROUP BY TICKER, strftime('%Y-%m', DATE)
    """
    
    cursor.execute(f"DROP TABLE IF EXISTS monthly_data;")
    cursor.execute(f"CREATE TABLE monthly_data AS {query_monthly};")

    # Zapytanie SQL do agregacji danych kwartalnych
    query_quarterly = """
    SELECT
        TICKER,
        substr(MONTH, 1, 4) as YEAR,
        ((cast(substr(MONTH, 6, 2) as integer) - 1) / 3 + 1) as QUARTER,
        AVG(AVG_OPEN) as AVG_QUARTER_OPEN,
        AVG(AVG_CLOSE) as AVG_QUARTER_CLOSE
    FROM monthly_data
    GROUP BY TICKER, substr(MONTH, 1, 4), ((cast(substr(MONTH, 6, 2) as integer) - 1) / 3 + 1)
    ORDER BY TICKER, YEAR, QUARTER
    """

    quarterly_df = pd.read_sql_query(query_quarterly, conn)

    output_file_path = 'data/quarterly_data.csv'
    quarterly_df.to_csv(output_file_path, index=False)

    print(f'Quarterly data saved to {output_file_path}')

    conn.close()
else:
    print('No valid data to combine.')


Quarterly data saved to data/quarterly_data.csv
