In [48]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Define folder paths
cleaned_pune_folder = r"C:\Users\karan\Downloads\Mansi Thesis\Check\Cleaned Pune"  # Replace with the actual path
scaled_pune_folder = r"C:\Users\karan\Downloads\Mansi Thesis\Check\Scaled Pune"   # Replace with the actual path

# Get the list of files in each folder and sort them
cleaned_pune_files = sorted([f for f in os.listdir(cleaned_pune_folder) if f.endswith('.xlsx')])
scaled_pune_files = sorted([f for f in os.listdir(scaled_pune_folder) if f.endswith('.xlsx')])

# Ensure the number of files matches
if len(cleaned_pune_files) != len(scaled_pune_files):
    raise ValueError("Mismatch in the number of files between Cleaned Pune and Scaled Pune folders.")

# Specify the output directory
output_dir = r"C:\Users\karan\Downloads\Mansi Thesis\Check\Pune_Results"
os.makedirs(output_dir, exist_ok=True)

# Define the list of metric names
metric_names = [
    "Impressions", "Reach", "Likes", "Love Reactions", "Care Reactions", "Haha Reactions", 
    "Wow Reactions", "Sad Reactions", "Angry Reactions", "Comment Count", "Comment Replies", 
    "Share Count", "Shared Reach", "Link Clicks", "Photo/Video Clicks", "Profile Clicks", 
    "Video Views", "Hide Post", "Report as Spam", "Unfollow Page", "CTR (%)", "Conversions", 
    "Conversion Rate (%)", "CPC(Rs)", "CPA(Rs)"
]

# Loop through each pair of files and plot comparisons
for i, (cleaned_file, scaled_file) in enumerate(zip(cleaned_pune_files, scaled_pune_files)):
    # Check if files match in naming convention (_Pre or _Post must match between folders)
    if not cleaned_file.split('_')[-2:] == scaled_file.split('_')[-2:]:
        raise ValueError(f"File mismatch: {cleaned_file} and {scaled_file} do not correspond.")
    
    # Read the data from each file
    cleaned_data_path = os.path.join(cleaned_pune_folder, cleaned_file)
    scaled_data_path = os.path.join(scaled_pune_folder, scaled_file)
    
    cleaned_data = pd.read_excel(cleaned_data_path)
    scaled_data = pd.read_excel(scaled_data_path)

    # Ensure both datasets have the same number of columns
    if cleaned_data.shape[1] != scaled_data.shape[1]:
        raise ValueError(f"Mismatch in the number of metrics (columns) between {cleaned_file} and {scaled_file}.")
    
    # Create a figure with enough space for both Cleaned and Scaled data plots
    num_metrics = len(metric_names)
    plt.figure(figsize=(12, num_metrics * 6))  # Adjust height based on the number of metrics
    
    # Plot Cleaned Data and Scaled Data for each metric side-by-side
    for j in range(num_metrics):
        # Plot Cleaned Data
        plt.subplot(num_metrics, 2, 2 * j + 1)  # Left side (Cleaned Data)
        plt.plot(range(len(cleaned_data)), cleaned_data.iloc[:, j], marker='o', label=f'Cleaned Data - {metric_names[j]}', linestyle='-', alpha=0.8)
        plt.title(f"Cleaned Data - {metric_names[j]}")
        plt.xlabel("Index")
        plt.ylabel(f"{metric_names[j]}")
        plt.legend()
        plt.grid(True)
        
        # Plot Scaled Data
        plt.subplot(num_metrics, 2, 2 * j + 2)  # Right side (Scaled Data)
        plt.plot(range(len(scaled_data)), scaled_data.iloc[:, j], marker='x', label=f'Scaled Data - {metric_names[j]}', linestyle='--', alpha=0.8, color='orange')
        plt.title(f"Scaled Data - {metric_names[j]}")
        plt.xlabel("Index")
        plt.ylabel(f"{metric_names[j]}")
        plt.legend()
        plt.grid(True)
    
    # Adjust spacing between subplots
    plt.tight_layout()
    
    # Save the plot for both Cleaned and Scaled Data
    output_path = os.path.join(output_dir, f"comparison_{i + 1}_{cleaned_file.split('.')[0]}.png")
    plt.savefig(output_path)
    plt.close()

print(f"Plots have been saved to the '{output_dir}' directory.")


Plots have been saved to the 'C:\Users\karan\Downloads\Mansi Thesis\Check\Pune_Results' directory.


In [50]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Define folder paths
cleaned_folder = r"C:\Users\karan\Downloads\Mansi Thesis\Check\cleaned_folder"  # Replace with the actual path to the cleaned folder
scaled_folder = r"C:\Users\karan\Downloads\Mansi Thesis\Check\scaled_folder"   # Replace with the actual path to the scaled folder


# Get the list of files in each folder and sort them
cleaned_pune_files = sorted([f for f in os.listdir(cleaned_pune_folder) if f.endswith('.xlsx')])
scaled_pune_files = sorted([f for f in os.listdir(scaled_pune_folder) if f.endswith('.xlsx')])

# Ensure the number of files matches
if len(cleaned_pune_files) != len(scaled_pune_files):
    raise ValueError("Mismatch in the number of files between Cleaned Pune and Scaled Pune folders.")

# Specify the output directory
output_dir = r"C:\Users\karan\Downloads\Mansi Thesis\Check\Results"
os.makedirs(output_dir, exist_ok=True)

# Define the list of metric names
metric_names = [
    "Impressions", "Reach", "Likes", "Love Reactions", "Care Reactions", "Haha Reactions", 
    "Wow Reactions", "Sad Reactions", "Angry Reactions", "Comment Count", "Comment Replies", 
    "Share Count", "Shared Reach", "Link Clicks", "Photo/Video Clicks", "Profile Clicks", 
    "Video Views", "Hide Post", "Report as Spam", "Unfollow Page", "CTR (%)", "Conversions", 
    "Conversion Rate (%)", "CPC(Rs)", "CPA(Rs)"
]

# Loop through each pair of files and plot comparisons
for i, (cleaned_file, scaled_file) in enumerate(zip(cleaned_pune_files, scaled_pune_files)):
    # Check if files match in naming convention (_Pre or _Post must match between folders)
    if not cleaned_file.split('_')[-2:] == scaled_file.split('_')[-2:]:
        raise ValueError(f"File mismatch: {cleaned_file} and {scaled_file} do not correspond.")
    
    # Read the data from each file
    cleaned_data_path = os.path.join(cleaned_pune_folder, cleaned_file)
    scaled_data_path = os.path.join(scaled_pune_folder, scaled_file)
    
    cleaned_data = pd.read_excel(cleaned_data_path)
    scaled_data = pd.read_excel(scaled_data_path)

    # Ensure both datasets have the same number of columns
    if cleaned_data.shape[1] != scaled_data.shape[1]:
        raise ValueError(f"Mismatch in the number of metrics (columns) between {cleaned_file} and {scaled_file}.")
    
    # Create a figure with enough space for both Cleaned and Scaled data plots
    num_metrics = len(metric_names)
    plt.figure(figsize=(12, num_metrics * 6))  # Adjust height based on the number of metrics
    
    # Plot Cleaned Data and Scaled Data for each metric side-by-side
    for j in range(num_metrics):
        # Plot Cleaned Data
        plt.subplot(num_metrics, 2, 2 * j + 1)  # Left side (Cleaned Data)
        plt.plot(range(len(cleaned_data)), cleaned_data.iloc[:, j], marker='o', label=f'Cleaned Data - {metric_names[j]}', linestyle='-', alpha=0.8)
        plt.title(f"Cleaned Data - {metric_names[j]}")
        plt.xlabel("Index")
        plt.ylabel(f"{metric_names[j]}")
        plt.legend()
        plt.grid(True)
        
        # Plot Scaled Data
        plt.subplot(num_metrics, 2, 2 * j + 2)  # Right side (Scaled Data)
        plt.plot(range(len(scaled_data)), scaled_data.iloc[:, j], marker='x', label=f'Scaled Data - {metric_names[j]}', linestyle='--', alpha=0.8, color='orange')
        plt.title(f"Scaled Data - {metric_names[j]}")
        plt.xlabel("Index")
        plt.ylabel(f"{metric_names[j]}")
        plt.legend()
        plt.grid(True)
    
    # Adjust spacing between subplots
    plt.tight_layout()
    
    # Save the plot for both Cleaned and Scaled Data
    output_path = os.path.join(output_dir, f"comparison_{i + 1}_{cleaned_file.split('.')[0]}.png")
    plt.savefig(output_path)
    plt.close()

print(f"Plots have been saved to the '{output_dir}' directory.")


Plots have been saved to the 'C:\Users\karan\Downloads\Mansi Thesis\Check\Results' directory.
