In [33]:
import os
import pandas as pd

def convert_and_save_to_excel(folder_path, filename):
    # Construct the full path to the text file
    txt_file_path = os.path.join(folder_path, filename)

    # Find the line number where the data starts
    skip_rows = 0
    header_line = ''
    with open(txt_file_path, 'r') as file:
        for i, line in enumerate(file):
            if "Accuracy and completeness are not guaranteed." in line:
                skip_rows = i + 2  # Skip the next two lines (header and empty line)
                header_line = next(file).strip()
                break

    # Split the existing header into new headers
    new_headers = header_line.split()

    # Read the .txt file into a pandas DataFrame, skipping the specified lines
    df = pd.read_csv(txt_file_path, delimiter='\t', skiprows=skip_rows, header=None)  # Adjust the delimiter if needed

    # Fill missing headers with "NA"
    while len(new_headers) < len(df.columns):
        new_headers.append("NA")

    # Assign the new headers to the DataFrame
    df.columns = new_headers

    # Split all rows using the same logic as the header
    for col in df.columns:
        df[col] = df[col].apply(lambda x: x.split() if isinstance(x, str) else ["NA"])

    # Construct the full path to the Excel file
    excel_file_path = os.path.join(folder_path, f'{os.path.splitext(filename)[0]}_with_new_headers_and_split_rows.xlsx')

    # Write the DataFrame to an Excel file
    df.to_excel(excel_file_path, index=False)

    print(f'Successfully converted and modified {txt_file_path} to {excel_file_path}')

    # Import the Excel file into Python
    imported_df = pd.read_excel(excel_file_path)

    # Perform any additional operations with the imported DataFrame here

    # Delete the Excel file
    os.remove(excel_file_path)

    print(f'Successfully imported and deleted Excel file: {excel_file_path}')

    # Create a new DataFrame to store the extracted text values for each row
    extracted_rows = {}

    # Iterate through each row in the converted DataFrame
    for index, row in df.iterrows():
        # Extract text values from the current row and store them in a Series
        row_values = pd.Series(row.explode().tolist())

        # Save the Series to the extracted_rows dictionary
        row_name = f'row{index}'
        extracted_rows[row_name] = row_values

    # Merge all extracted rows into a single DataFrame
    merged_rows_df = pd.DataFrame(extracted_rows)

    # Transpose the DataFrame
    transposed_merged_rows_df = merged_rows_df.T

    # Use the first row as headers in the transposed DataFrame
    transposed_merged_rows_df.columns = transposed_merged_rows_df.iloc[0]

    # Drop the first row (since it's now the header)
    transposed_merged_rows_df = transposed_merged_rows_df[1:]

    # Convert all values to strings before using .str accessor
    transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)

    # Remove specified characters from all cells in the DataFrame
    transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))

    transposed_merged_rows_df.columns = transposed_merged_rows_df.columns.str.replace(r"['\[\],]", '', regex=True)

    # Remove rows where the column 'Symb' contains the text 'Symb'
    transposed_merged_rows_df = transposed_merged_rows_df[~transposed_merged_rows_df['Symb'].astype(str).str.contains('Symb', case=False, na=False)]

    # Construct the full path to the second Excel file
    excel_output_path = os.path.join(folder_path, f'{os.path.splitext(filename)[0]}_final_output.xlsx')

    # Write the DataFrame to the second Excel file
    transposed_merged_rows_df.to_excel(excel_output_path, index=False)

    print(f'Successfully created final output Excel file: {excel_output_path}')

# Specify the path to your folder
folder_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion'

# List of filenames
filenames = [
    'cale_20240116 (Calendar Best).txt',
    'calp_20240116 (Calendar Highest).txt',
    'cw_aec_20240116 (Covered Annual Ex Cash).txt',
    'cw_aem_20240116 (Covered Annual Margin).txt',
    'cw_dbc_20240116 (Covered Protection Cash).txt',
    'cw_dbm_20240116 (Covered Protection Margin).txt',
    'np_aem_20240116 (NP Annual Expected).txt',
    'np_dbc_20240116 (NP Best Expected).txt',
]

# Iterate through each filename and convert the file
for filename in filenames:
    convert_and_save_to_excel(folder_path, filename)


Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cale_20240116 (Calendar Best).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cale_20240116 (Calendar Best)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cale_20240116 (Calendar Best)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cale_20240116 (Calendar Best)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\calp_20240116 (Calendar Highest).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\calp_20240116 (Calendar Highest)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\calp_20240116 (Calendar Highest)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\calp_20240116 (Calendar Highest)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aec_20240116 (Covered Annual Ex Cash).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aec_20240116 (Covered Annual Ex Cash)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aec_20240116 (Covered Annual Ex Cash)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aec_20240116 (Covered Annual Ex Cash)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aem_20240116 (Covered Annual Margin).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aem_20240116 (Covered Annual Margin)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aem_20240116 (Covered Annual Margin)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_aem_20240116 (Covered Annual Margin)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbc_20240116 (Covered Protection Cash).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbc_20240116 (Covered Protection Cash)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbc_20240116 (Covered Protection Cash)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbc_20240116 (Covered Protection Cash)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbm_20240116 (Covered Protection Margin).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbm_20240116 (Covered Protection Margin)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbm_20240116 (Covered Protection Margin)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\cw_dbm_20240116 (Covered Protection Margin)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_aem_20240116 (NP Annual Expected).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_aem_20240116 (NP Annual Expected)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_aem_20240116 (NP Annual Expected)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_aem_20240116 (NP Annual Expected)_final_output.xlsx
Successfully converted and modified C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_dbc_20240116 (NP Best Expected).txt to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_dbc_20240116 (NP Best Expected)_with_new_headers_and_split_rows.xlsx
Successfully imported and deleted Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_dbc_20240116 (NP Best Expected)_with_new_headers_and_split_rows.xlsx


  transposed_merged_rows_df = transposed_merged_rows_df.applymap(str)
  transposed_merged_rows_df = transposed_merged_rows_df.applymap(lambda x: x.replace("'", "").replace(",", "").replace("]", "").replace("[", ""))


Successfully created final output Excel file: C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\np_dbc_20240116 (NP Best Expected)_final_output.xlsx


In [80]:
import pandas as pd

# Replace 'str_20240116 (Straddle Trad).txt' with the actual file name
txt_file_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\str_20240116 (Straddle Trad).txt'

# Read the tab-separated text file into a pandas DataFrame, skipping rows until '20240116' is found
with open(txt_file_path, 'r') as file:
    lines = file.readlines()

# Find the index of the line containing '20240116'
start_index = next(i for i, line in enumerate(lines) if '20240116' in line)

# Read the text file starting from the line with '20240116' into a pandas DataFrame
df = pd.read_csv(txt_file_path, delimiter='\t', skiprows=range(1, start_index))

# Replace 'output_file.xlsx' with the desired Excel file name
excel_file_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\output_file.xlsx'




import pandas as pd
import re

# Assuming your DataFrame is df_imported
# Join the first row into a single string and then split on one or more spaces
text_list = re.split('\s+', ' '.join(df_imported.iloc[1]))

# Create a new DataFrame with each text from the first row as individual rows
df_imported_row1 = pd.DataFrame({'Text': text_list})





import pandas as pd
import re

# Assuming your DataFrame is df_imported
# Initialize an empty list to store DataFrames for each row
dfs_list = []

# Iterate through each row in the DataFrame, starting from the second row
for index, row in df_imported.iloc[1:].iterrows():
    # Join the row into a single string and then split on one or more spaces
    text_list = re.split('\s+', ' '.join(row))
    
    # Create a new DataFrame with each text from the row as individual rows
    df_row = pd.DataFrame({'Text': text_list})
    
    # Append the DataFrame to the list
    dfs_list.append(df_row)

# Concatenate all DataFrames in the list into a single DataFrame side by side
final_df = pd.concat(dfs_list, axis=1)

# Print the final DataFrame
print("Final DataFrame:")

# Assuming final_df is the DataFrame you want to transpose
transposed_df = final_df.transpose()

# Alternatively, you can use the .T attribute
# transposed_df = final_df.T

# Assuming transposed_df is the DataFrame you want to work with
transposed_df.columns = transposed_df.iloc[0]

# Drop the first row (which is now the header)
transposed_df = transposed_df[1:]

# Reset the index for a cleaner DataFrame
transposed_df.reset_index(drop=True, inplace=True)

# Assuming transposed_df is the DataFrame you want to write to Excel
excel_output_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\str_20240116 (Straddle Trad).xlsx'

# Write the transposed DataFrame to an Excel file
transposed_df.to_excel(excel_output_path, index=False)

print(f"Transposed DataFrame has been written to {excel_output_path}.")



Final DataFrame:
Transposed DataFrame has been written to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\str_20240116 (Straddle Trad).xlsx.


In [81]:
import pandas as pd

# Replace 'xtr_20240116 (Straddle New).txt' with the actual file name
txt_file_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\xtr_20240116 (Straddle New).txt'

# Read the tab-separated text file into a pandas DataFrame, skipping rows until '20240116' is found
with open(txt_file_path, 'r') as file:
    lines = file.readlines()

# Find the index of the line containing '20240116'
start_index = next(i for i, line in enumerate(lines) if 'Stradnew:  Comparing 20240112 .vs. 20240116' in line)

# Read the text file starting from the line with '20240116' into a pandas DataFrame
df = pd.read_csv(txt_file_path, delimiter='\t', skiprows=range(1, start_index))

# Replace 'output_file.xlsx' with the desired Excel file name
excel_file_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\output_file.xlsx'

import pandas as pd
import re

# Assuming your DataFrame is df_imported
# Join the first row into a single string and then split on one or more spaces
text_list = re.split('\s+', ' '.join(df_imported.iloc[1]))

# Create a new DataFrame with each text from the first row as individual rows
df_imported_row1 = pd.DataFrame({'Text': text_list})

import pandas as pd
import re

# Assuming your DataFrame is df_imported
# Initialize an empty list to store DataFrames for each row
dfs_list = []

# Iterate through each row in the DataFrame, starting from the second row
for index, row in df_imported.iloc[1:].iterrows():
    # Join the row into a single string and then split on one or more spaces
    text_list = re.split('\s+', ' '.join(row))
    
    # Create a new DataFrame with each text from the row as individual rows
    df_row = pd.DataFrame({'Text': text_list})
    
    # Append the DataFrame to the list
    dfs_list.append(df_row)

# Concatenate all DataFrames in the list into a single DataFrame side by side
final_df = pd.concat(dfs_list, axis=1)

# Print the final DataFrame
print("Final DataFrame:")

# Assuming final_df is the DataFrame you want to transpose
transposed_df = final_df.transpose()

# Alternatively, you can use the .T attribute
# transposed_df = final_df.T

# Assuming transposed_df is the DataFrame you want to work with
transposed_df.columns = transposed_df.iloc[0]

# Drop the first row (which is now the header)
transposed_df = transposed_df[1:]

# Reset the index for a cleaner DataFrame
transposed_df.reset_index(drop=True, inplace=True)

# Assuming transposed_df is the DataFrame you want to write to Excel
excel_output_path = r'C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\xtr_20240116 (Straddle New).xlsx'

# Write the transposed DataFrame to an Excel file
transposed_df.to_excel(excel_output_path, index=False)

print(f"Transposed DataFrame has been written to {excel_output_path}.")



Final DataFrame:
Transposed DataFrame has been written to C:\Users\neste\3D Objects\Gitrepos\robertnesterodhiambo-Data-analysis\file conversion\xtr_20240116 (Straddle New).xlsx.
