In [None]:
# recycling data
import os
import glob
import pandas as pd

# Directory containing the tab-delimited files
input_directory = r"C:\Users\Desktop\RE-folder\notepads"

# Output directory for CSV files
output_directory = r"C:\Users\Desktop\RE-folder\data"

# Read all tab-delimited files in the directory
filenames = glob.glob(os.path.join(input_directory, "*.txt"))

# Check if any .txt files are read
if not filenames:
    print("No .txt files found in the directory.")
else:
    # Combine all the files into a single DataFrame
    dfs = []
    for file in filenames:
        try:
            df = pd.read_csv(file, sep='\t', dtype={'postal_code': str, 'phone_number': str}, 
                             low_memory=False, on_bad_lines='skip', encoding='ISO-8859-1')
            df2 = df[df['status'].isin(['AA'])]
            df3 = df2[['phone_number', 'postal_code']]
            dfs.append(df3)
        except pd.errors.ParserError as e:
            print(f"Error parsing {file}: {e}")
        except UnicodeDecodeError as e:
            print(f"Encoding error in {file}: {e}")
        except Exception as e:
            print(f"An unexpected error occurred with {file}: {e}")

    if dfs:
        combined_df = pd.concat(dfs, ignore_index=True)

        # Ensure state column is treated as string and handle missing values
        combined_df['state'] = combined_df['state'].astype(str).fillna('Unknown')

        # Split the data by state and save to CSV files
        states = combined_df['state'].unique()

        for state in states:
            state_df = combined_df[combined_df['state'] == state]

            # Ensure the output directory includes the state name for generalization
            state_output_directory = os.path.join(output_directory)
            os.makedirs(state_output_directory, exist_ok=True)
            output_filename = os.path.join(state_output_directory, f'FILTERED-{state}.csv')
            state_df.to_csv(output_filename, index=False)
            print(f'Saved to {output_filename}')
    else:
        print("No valid dataframes were created from the files.")
