## CSV Data Processing and Combination Script

### Here we have multiple CSV Files stored in a directory and we will read data form there, divide two columns from each CSV File and then store the result in a new column with the same name as that of CSV File. Finally this File will be saved as Combined_CSV_File CSV File.

### Taking Sample Data for this

### _Here we are taking 3 sample data then we will simple save them as CSV File_

In [1]:
import pandas as pd

# Define data for each CSV file
data1 = {
    'column1': [10, 20, 30, 40],
    'column2': [15, 25, 35, 45],
    'column3': [12, 22, 32, 42]
}

data2 = {
    'column1': [50, 60, 70, 80],
    'column2': [55, 65, 75, 85],
    'column3': [52, 62, 72, 82]
}

data3 = {
    'column1': [90, 100, 110, 120],
    'column2': [95, 105, 115, 125],
    'column3': [92, 102, 112, 122]
}

# Create DataFrame for each file
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
df3 = pd.DataFrame(data3)

# Save each DataFrame to a CSV file
df1.to_csv(r'Path_to_File', index=False)
df2.to_csv(r'Path_to_File', index=False)
df3.to_csv(r'Path_to_File', index=False)

### Printing Dataset

In [2]:
print("First Dataframe is:\n")
print(df1)

print()

print("Second Dataframe is:\n")
print(df2)

print()

print("Third Dataframe is:\n")
print(df3)

First Dataframe is:

   column1  column2  column3
0       10       15       12
1       20       25       22
2       30       35       32
3       40       45       42

Second Dataframe is:

   column1  column2  column3
0       50       55       52
1       60       65       62
2       70       75       72
3       80       85       82

Third Dataframe is:

   column1  column2  column3
0       90       95       92
1      100      105      102
2      110      115      112
3      120      125      122


### Reading Files from directory

In [3]:
import os

# Function to process each CSV file

def process_csv(file_path):
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Extract file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Perform division and multiplication
    df['new_column'] = (df['column1'] / df['column2']) * 100
    
    # Add a new column with the file name
    df['file_name'] = file_name
    
    return df

# List of CSV files to process
csv_files = ['File1.csv', 'File2.csv', 'File3.csv']  # Add CSV file names here

# List to store DataFrames
dfs = []

# Process each CSV file
for file in csv_files:
    
    file_path = os.path.join(r'Path_to_Directory', file)  # Update the path accordingly
    df = process_csv(file_path)
    dfs.append(df)

# Concatenate DataFrames
combined_df = pd.concat(dfs, ignore_index=True)

# Save combined DataFrame to a new CSV file
combined_df.to_csv(r'Path_to_File', index=False)

In [4]:
import os
import pandas as pd

# Function to process each CSV file
def process_csv(file_path):
    
    result_df = pd.DataFrame()
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Extract file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Perform division and multiplication
    result_df[file_name] = (df['column1'] / df['column2']) * 100
    
    return result_df

# List of CSV files to process
csv_files = ['File1.csv', 'File2.csv', 'File3.csv']

combined_df = pd.DataFrame()

# Process each CSV file
for file in csv_files:
    
    file_path = os.path.join(r'Path_to_Directory', file)  # Update the path accordingly
    df = process_csv(file_path)
    combined_df = pd.concat([combined_df, df], axis = 1)

# Save combined DataFrame to a new CSV file
combined_df.to_csv(r'Path_to_File', index=False)

### Printing Resultant Dataframe

In [5]:
print("Resultant Dataframe is:\n")

combined_df

Resultant Dataframe is:



Unnamed: 0,File1,File2,File3
0,66.666667,90.909091,94.736842
1,80.0,92.307692,95.238095
2,85.714286,93.333333,95.652174
3,88.888889,94.117647,96.0


### In case we do not have list of csv files we can directly read all files from directory and then if it is csv then apply process

### *_Here we are assuming all csv files have column 1 and column 2_*

In [17]:
# Function to process each CSV file
def process_csv(file_path):
    
    result_df = pd.DataFrame()
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Extract file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Perform division and multiplication
    result_df[file_name] = (df['column1'] / df['column2']) * 100
    
    return result_df

# For Storing resultant Dataframe
combined_df = pd.DataFrame()

import os

# Directory path
directory = 'Path_to_Directory'

# Iterate through the files in the directory
for filename in os.listdir(directory):
    
    # Checking if the file is a CSV file
    if filename.endswith('.csv'):
        
        # Construct the full file path
        file_path = os.path.join(directory, filename)
        
        df = process_csv(file_path)
        
        combined_df = pd.concat([combined_df, df], axis = 1)
        
# Save combined DataFrame to a new CSV file
combined_df.to_csv(r'Path_to_File', index=False)

### Printing Resultant Dataframe

In [18]:
print("Resultant Dataframe is:\n")

combined_df

Resultant Dataframe is:



Unnamed: 0,File1,File2,File3
0,66.666667,90.909091,94.736842
1,80.0,92.307692,95.238095
2,85.714286,93.333333,95.652174
3,88.888889,94.117647,96.0


### In case we need to do only for first and second Column of each CSV File

In [22]:
# Function to process each CSV file
def process_csv(file_path):
    
    result_df = pd.DataFrame()
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Extract file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    columns_list = df.columns
    
    # Perform division and multiplication
    result_df[file_name] = (df[columns_list[0]] / df[columns_list[1]]) * 100
    
    return result_df

# For Storing resultant Dataframe
combined_df = pd.DataFrame()

import os

# Directory path
directory = 'Path_to_Directory'

# Iterate through the files in the directory
for filename in os.listdir(directory):
    
    # Checking if the file is a CSV file
    if filename.endswith('.csv'):
        
        # Construct the full file path
        file_path = os.path.join(directory, filename)
        
        df = process_csv(file_path)
        
        combined_df = pd.concat([combined_df, df], axis = 1)
        
# Save combined DataFrame to a new CSV file
combined_df.to_csv(r'Path_to_File', index=False)

In [23]:
combined_df

Unnamed: 0,File1,File2,File3
0,66.666667,90.909091,94.736842
1,80.0,92.307692,95.238095
2,85.714286,93.333333,95.652174
3,88.888889,94.117647,96.0
