The code below takes the folder of stock indices and converts the entire folder of txt files to a single csv.
Proper file location will need to be adjusted for individual use

In [None]:
import os
import pandas as pd 

# Define the folder path containing the .txt files and the output CSV file
FOLDER_PATH = r"C:\Users\kjbsh\Documents\d_world_txt\data\daily\world\stooq stocks indices"  # Change this to your folder location
OUTPUT_FILE = r"c:\Users\kjbsh\Documents\stockData.csv"  # Change the output file name if needed

def merge_txt_to_csv(folder_path, output_file):
    all_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
    merged_data = []

    if not all_files:
        print("No text files found in the folder.")
        return

    for file in all_files:
        file_path = os.path.join(folder_path, file)
        try:
            # Read the file into a DataFrame
            df = pd.read_csv(file_path, delimiter=',', header=0)  # Adjust delimiter if needed
            df['Source'] = file  # Add a column to track source file name
            merged_data.append(df)
        except Exception as e:
            print(f"Error reading {file}: {e}")

    if merged_data:
        final_df = pd.concat(merged_data, ignore_index=True)
        final_df.to_csv(output_file, index=False)
        print(f"Merged CSV saved as: {output_file}")
    else:
        print("No valid data to merge.")

# Run the function
merge_txt_to_csv(FOLDER_PATH, OUTPUT_FILE)


Merged CSV saved as: c:\Users\kjbsh\Documents\stockData.csv


Below is code that takes the raw econ data and filters out all the countries where we do not have matching stock market data
The stock indices used in our data set: 
_^de → Germany (likely DAX or XETRA)
_^hk → Hong Kong (Hang Seng Index - HSI)
_^hu → Hungary (Budapest Stock Exchange - BUX)
_^jp → Japan (Nikkei 225 or TOPIX)
_^pl → Poland (Warsaw Stock Exchange - WIG20)
_^pl20 → Likely WIG20 (Warsaw Stock Exchange)
_^plnc → Possibly mWIG40 or sWIG80, but unclear
_^plws → Could be related to Polish stock market indices
_^uk → United Kingdom (FTSE 100)
_^us → United States (S&P 500, Dow Jones, NASDAQ)
_^usnm → Possibly NASDAQ Mid Cap stocks
_^usnq → Likely NASDAQ Composite
_^usns → Possibly NASDAQ Small Cap stocks

So all other countries in the econ csv that are not in this list should be deleted

The column names, and it seems like all the econ data is formatted in a weird way with leading and trailing white space, so we must truncate that first.

In [11]:
import pandas as pd


# 1. Read in the econ data

df = pd.read_csv(r"c:\Users\kjbsh\Documents\archive1\Global Economy Indicators.csv")

# Strip whitespace from column names
df.columns = df.columns.str.strip()
# Strip whitespace from Country names
df["Country"] = df["Country"].str.strip()
# Print column names to verify
print(df.columns)

# 2. Define the list of valid countries 
valid_countries = [
    "Germany",                  # ^de
    "China, Hong Kong SAR",     # ^hk
    "Hungary",                  # ^hu
    "Japan",                    # ^jp
    "Poland",                   # ^pl, ^pl20, ^plnc, ^plws
    "United Kingdom",           # ^uk
    "United States"             # ^us, ^usnm, ^usnq, ^usns
]

# 3. Filter the DataFrame so only rows with these countries remain
df_filtered = df[df["Country"].isin(valid_countries)]

# 4. Write filtered data to a new CSV
df_filtered.to_csv(r"c:\Users\kjbsh\Documents\econDataFilteredCountries.csv", index=False)

print("Filtered CSV saved as econDataFilteredCountries.csv")



Index(['CountryID', 'Country', 'Year', 'AMA exchange rate',
       'IMF based exchange rate', 'Population', 'Currency', 'Per capita GNI',
       'Agriculture, hunting, forestry, fishing (ISIC A-B)',
       'Changes in inventories', 'Construction (ISIC F)',
       'Exports of goods and services', 'Final consumption expenditure',
       'General government final consumption expenditure',
       'Gross capital formation',
       'Gross fixed capital formation (including Acquisitions less disposals of valuables)',
       'Household consumption expenditure (including Non-profit institutions serving households)',
       'Imports of goods and services', 'Manufacturing (ISIC D)',
       'Mining, Manufacturing, Utilities (ISIC C-E)',
       'Other Activities (ISIC J-P)', 'Total Value Added',
       'Transport, storage and communication (ISIC I)',
       'Wholesale, retail trade, restaurants and hotels (ISIC G-H)',
       'Gross National Income(GNI) in USD', 'Gross Domestic Product (GDP)'],
    