In [3]:
import os
import pandas as pd

# Define input and output paths
input_path = "/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW"
output_path = "/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/TRANSFORMED"

# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)

# List CSV files efficiently
files = [os.path.join(input_path, f) for f in os.listdir(input_path) if f.endswith(".csv")]
print(f"Found {len(files)} CSV files: {files}")

# Define transformation function
def transform_csv(df, columns_to_remove, new_column_order):
    """Transform a DataFrame by removing columns and reordering."""
    df = df.drop(columns=[col for col in columns_to_remove if col in df.columns], errors='ignore')
    df = df[[col for col in new_column_order if col in df.columns]]
    return df

# Load and concatenate all CSV files
try:
    dfs = [pd.read_csv(file) for file in files]
    df_combined = pd.concat(dfs, ignore_index=True)
    print(f"Combined DataFrame shape: {df_combined.shape}")
except Exception as e:
    print(f"Error loading files: {e}")
    raise

# Define columns to remove and desired order
remove_columns = [
    'ID'
]
# "ID",Name,Address,Featured image,Bing Maps URL,Latitude,Longitude,Rating,Rating Info,Category,Open Hours,Website,Phone,Emails,Social Medias,Facebook,Instagram,Twitter

new_columns = ['Name', 'Address', 'Category', 'Phone', 'Rating', 'Website']

# Transform the combined DataFrame
# Concatenate address columns
# df_combined['Address'] = df_combined['W4Efsd 3'].astype(str) + ' ' + df_combined['W4Efsd 4'].astype(str)

# Select and reorder columns
df_transformed = transform_csv(
    df_combined,
    remove_columns,
    new_columns
)

# Handle missing values and reset index
df_cleaned = df_transformed.dropna().reset_index(drop=True)
print(f"Cleaned DataFrame shape: {df_cleaned.shape}")

# Save to CSV
output_file = os.path.join(output_path, "Tel_pooja_store_20.csv")
df_cleaned.to_csv(output_file, index=False)
print(f"Saved transformed file to: {output_file}")

Found 8 CSV files: ['/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/salons.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/ladies_emp.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/silk_sarees.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/schools.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/hotels(lodge).csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/malls.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/tex&handlooms.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/hospital.csv']
Combined DataFrame shape: (494, 18)
Cleaned DataFrame shape: (91, 6)
Saved transformed file to: /Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/TRANSFORMED/Tel_pooja_store_20.csv


In [4]:
import os
import pandas as pd

# Define input and output paths
input_path = "/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW"
output_path = "/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/TRANSFORMED"

# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)

# List CSV files efficiently
files = [os.path.join(input_path, f) for f in os.listdir(input_path) if f.endswith(".csv")]
print(f"Found {len(files)} CSV files: {files}")

# Define transformation function
def transform_csv(df, columns_to_remove, new_column_order):
    """Transform a DataFrame by removing columns and reordering."""
    df = df.drop(columns=[col for col in columns_to_remove if col in df.columns], errors='ignore')
    df = df[[col for col in new_column_order if col in df.columns]]
    return df

# Define columns to remove and desired order
remove_columns = [
    'ID'
]
# "ID",Name,Address,Featured image,Bing Maps URL,Latitude,Longitude,Rating,Rating Info,Category,Open Hours,Website,Phone,Emails,Social Medias,Facebook,Instagram,Twitter

new_columns = ['Name', 'Address', 'Category', 'Phone', 'Rating', 'Website']

# Process each CSV file individually
for file in files:
    try:
        # Read the CSV
        df = pd.read_csv(file)
        print(f"Processing: {os.path.basename(file)} - Shape: {df.shape}")

        # Concatenate address columns
        if 'W4Efsd 3' in df.columns and 'W4Efsd 4' in df.columns:
            df['Address'] = df['W4Efsd 3'].astype(str) + ' ' + df['W4Efsd 4'].astype(str)
        else:
            print(f"Warning: Missing address columns in {os.path.basename(file)}. Using empty Address.")
            df['Address'] = ""

        # Apply transformations
        df_transformed = transform_csv(df, remove_columns, new_columns)

        # Handle missing values and reset index
        df_cleaned = df_transformed.dropna().reset_index(drop=True)
        print(f"Cleaned shape: {df_cleaned.shape}")

        # Define output file path
        output_file = os.path.join(output_path, f"transformed_{os.path.basename(file)}")

        # Save to CSV
        df_cleaned.to_csv(output_file, index=False)
        print(f"Saved: {output_file}")

    except Exception as e:
        print(f"Error processing {os.path.basename(file)}: {e}")

print("All files processed successfully!")

Found 8 CSV files: ['/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/salons.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/ladies_emp.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/silk_sarees.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/schools.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/hotels(lodge).csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/malls.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/tex&handlooms.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/hospital.csv']
Processing: salons.csv - Shape: (80, 18)
Cleaned shape: (11, 6)
Saved: /Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/TRANSFORMED/transformed_salons.csv
Processing: ladies_emp.csv - Shape: (20, 18)
Cleaned shape: (0, 6)
Saved: /Users/app

In [None]:
import os
import pandas as pd

# Define input and output paths
input_path = "/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW"
output_path = "/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/TRANSFORMED"

# Ensure output directory exists
os.makedirs(output_path, exist_ok=True)

# List CSV files efficiently
files = [os.path.join(input_path, f) for f in os.listdir(input_path) if f.endswith(".csv")]
print(f"Found {len(files)} CSV files: {files}")

# Define transformation function
def transform_csv(df, new_column_order):
    """Transform a DataFrame by selecting and reordering specified columns."""
    # Keep only columns that exist in the DataFrame from the desired order
    existing_columns = [col for col in new_column_order if col in df.columns]
    df = df[existing_columns]
    return df

# Desired columns
new_columns = ['Name', 'Address', 'Category', 'Phone', 'Rating', 'Website']

# Process each CSV file individually
for file in files:
    try:
        # Read the CSV
        df = pd.read_csv(file)
        print(f"Processing: {os.path.basename(file)} - Shape: {df.shape}")

        # Apply transformations (select and reorder columns)
        df_transformed = transform_csv(df, new_columns)

        # Handle missing values and reset index
        # df_cleaned = df_transformed.dropna().reset_index(drop=False)
        print(f"Cleaned shape: {df_cleaned.shape}")

        # Define output file path
        output_file = os.path.join(output_path, f"{os.path.basename(file)}_transformed")

        # Save to CSV
        df_cleaned.to_csv(output_file, index=False)
        print(f"Saved: {output_file}")

    except Exception as e:
        print(f"Error processing {os.path.basename(file)}: {e}")

print("All files processed successfully!")

Found 8 CSV files: ['/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/salons.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/ladies_emp.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/silk_sarees.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/schools.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/hotels(lodge).csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/malls.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/tex&handlooms.csv', '/Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/RAW/hospital.csv']
Processing: salons.csv - Shape: (80, 18)
Cleaned shape: (11, 7)
Saved: /Users/apple/Desktop/Python_workbook/data_scraping_project/TELUNGANA/TRANSFORMED/salons.csv_transformed
Processing: ladies_emp.csv - Shape: (20, 18)
Cleaned shape: (0, 7)
Saved: /Users/app