# Outliers removal based on IQR method (2-iterations) & density Plots

In [None]:
# 1 Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os 
import seaborn as sns

In [None]:
# 1.2 define data directory & image directory paths

# Define the path to the directory where your data files are located
data_dir = os.path.join(os.getcwd(), 'data')
image_dir = os.path.join(os.getcwd(), 'images')

# Temperature dataset

In [None]:
# List of input file names
input_files = [
    '03_m7c_input.csv'
]


# Loop twice for outlier check and removal
for i in range(2):
    # Use original input files in the first iteration
    if i == 0:
        input_files_to_use = input_files
    # Use cleaned files generated in the first iteration in the second iteration
    else:
        input_files_to_use = [f[:-4] + f'_cleaned_{i}.csv' for f in input_files]

     
    # Loop through the input files
    for input_file in input_files_to_use:
        # Load the data
        if i == 0:
            # Use original input file in the first iteration
            df = pd.read_csv(f"{data_dir}/{input_file}")
        else:
            # Use cleaned file in the second iteration
            df = pd.read_csv(f"{data_dir}/{input_file}")

        # Calculate the interquartile range (IQR)
        Q1 = df['Temp[°C]'].quantile(0.25)
        Q3 = df['Temp[°C]'].quantile(0.75)
        IQR = Q3 - Q1

        # Determine the upper and lower bounds
        lower_bound = Q1 - 1.5*IQR
        upper_bound = Q3 + 1.5*IQR

        # Identify the outliers
        outliers = df[(df['Temp[°C]'] < lower_bound) | (df['Temp[°C]'] > upper_bound)]
        
        # Save the outliers to a new CSV file
        outliers.to_csv(f"{data_dir}/{input_file[:-4]}_outliers.csv", index=False)
        
        # Remove the outliers
        df_cleaned = df[(df['Temp[°C]'] >= lower_bound) & (df['Temp[°C]'] <= upper_bound)]

        # Number of Outliers Removed
        num_outliers_removed = len(outliers)
        
        # Print Number of Outliers Removed
        print(f"{num_outliers_removed} outliers were removed from {input_file}")
        
        # Save the cleaned data to a new CSV file with the intended name
        cleaned_file_number = i + 1
        if cleaned_file_number == 1:
            cleaned_file_name = f"{input_file[:-4]}_cleaned_1.csv"
        else:
            cleaned_file_name = f"{input_file[:-4]}_cleaned_{cleaned_file_number}.csv"
        cleaned_file_name = cleaned_file_name.replace("_1_cleaned", "")
        df_cleaned.to_csv(f"{data_dir}/{cleaned_file_name}", index=False)
          
        # Set the figure size
        fig, ax = plt.subplots(figsize=(16, 9))
        plt.style.use("tableau-colorblind10")
                
        # Plot the data
        ax.scatter(df['Time'], df['Temp[°C]'], color='black', s=20, alpha=1,)
        ax.scatter(df_cleaned['Time'], df_cleaned['Temp[°C]'], color='red', s=20, alpha=1, label='Cleaned Data')
        ax.scatter(outliers['Time'], outliers['Temp[°C]'], color='blue', s=20, alpha=1, label='Outliers')
        
        # Add a legend and labels
        ax.legend()
        ax.set_xlabel('Time', fontsize=18)
        ax.set_ylabel('Temperature [°C]', fontsize=18)
        ax.set_title('03-M7car Preprocessed and Cleaned Temperature Data', fontsize=22)
        
        # Modify x-axis ticks to display every 10th tick
        n = 10
        plt.xticks(np.arange(0, len(df["Time"]), n))
        plt.xticks(rotation=45, fontsize=12)
    
        # Modify y-axis ticks
        interval = 0.1
        plt.yticks(np.arange(df['Temp[°C]'].min(), df['Temp[°C]'].max() + interval, interval), fontsize=14)

        # Convert 'Time' column to datetime format
        df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
        
        # Identify the outliers
        outliers = df[(df['Temp[°C]'] < lower_bound) | (df['Temp[°C]'] > upper_bound)]

        # Calculate the total duration of all outlier periods for the full dataset
        if not outliers.empty:
            total_duration = 0
            in_outlier_period = False
            start_outlier_time = None

            for i, row in df.iterrows():
                is_outlier = row['Time'] in outliers['Time'].values

                if is_outlier and not in_outlier_period:
                    in_outlier_period = True
                    start_outlier_time = row['Time']
                elif not is_outlier and in_outlier_period:
                    in_outlier_period = False
                    duration = (row['Time'] - start_outlier_time).total_seconds()
                    total_duration += duration

            minutes = int(total_duration // 60)
            seconds = int(total_duration % 60)
            print(f"Total duration of all outlier periods: {minutes} minutes and {seconds} seconds")
        else:
            print("No outliers were found in the dataset.")
       
        # Save the plot in the "images" directory with the appropriate name
        plot_name = f"{input_file[:-4]}_iteration_{i+1}.jpg"
        plt.savefig(os.path.join('images', plot_name))
        
        # Show the plot
        plt.show()

In [None]:
import pandas as pd
import seaborn as sns

# Load your data into a pandas DataFrame
df = pd.read_csv(f"{data_dir}/03_m7c_input.csv")

# Select the variable you want to plot
var_to_plot = 'Temp[°C]'

# Create a density plot using Seaborn
sns.kdeplot(df[var_to_plot], shade=True, color='red')

# Set the title and axis labels
plt.title('03-M7Car Density Plot of ' + 'Temperature' + ' Pre-Processed Data')
plt.xlabel('Temperature [°C]', fontsize=12)
plt.ylabel('Density', fontsize=12)

# Save the plot as a JPEG image
plt.savefig('0_DensityPlot_-Temp.jpg', dpi=300, bbox_inches='tight')

# Display the plot
plt.show()

In [None]:
import pandas as pd
import seaborn as sns

# Load your data into a pandas DataFrame
df = pd.read_csv(f"{data_dir}/03_m7c_input_cleaned_1.csv")

# Select the variable you want to plot
var_to_plot = 'Temp[°C]'

# Create a density plot using Seaborn
sns.kdeplot(df[var_to_plot], shade=True, color='green')

# Set the title and axis labels
plt.title('03-M7Car KDE Plot of ' + 'Temperature' + ' 1st Iteration Clean Data')
plt.xlabel('Temperature [°C]', fontsize=12)
plt.ylabel('Density', fontsize=12)

# Save the plot as a JPEG image
plt.savefig('1_DensityPlot_-Temp.jpg', dpi=300, bbox_inches='tight')

# Display the plot
plt.show()

In [None]:
import pandas as pd
import seaborn as sns

# Load your data into a pandas DataFrame
df = pd.read_csv(f"{data_dir}/03_m7c_input_cleaned_2.csv")

# Select the variable you want to plot
var_to_plot = 'Temp[°C]'

# Create a density plot using Seaborn
sns.kdeplot(df[var_to_plot], shade=True, color='blue')

# Set the title and axis labels
plt.title('03-M7Car Density Plot of ' + 'Temperature' + ' 2nd Iteration Data')
plt.xlabel('Temperature [°C]')
plt.ylabel('Density')

# Save the plot as a JPEG image
plt.savefig('2_DensityPlot_-Temp.jpg', dpi=300, bbox_inches='tight')

# Display the plot
plt.show()

In [None]:
# List of input file names
input_files = [
    '03_m7c_input.csv'
]


# Loop twice for outlier check and removal
for i in range(2):
    # Use original input files in the first iteration
    if i == 0:
        input_files_to_use = input_files
    # Use cleaned files generated in the first iteration in the second iteration
    else:
        input_files_to_use = [f[:-4] + f'_cleaned_{i}.csv' for f in input_files]

     
    # Loop through the input files
    for input_file in input_files_to_use:
        # Load the data
        if i == 0:
            # Use original input file in the first iteration
            df = pd.read_csv(f"{data_dir}/{input_file}")
        else:
            # Use cleaned file in the second iteration
            df = pd.read_csv(f"{data_dir}/{input_file}")

        # Calculate the interquartile range (IQR)
        Q1 = df['Temp[°C]'].quantile(0.25)
        Q3 = df['Temp[°C]'].quantile(0.75)
        IQR = Q3 - Q1

        # Determine the upper and lower bounds
        lower_bound = Q1 - 1.5*IQR
        upper_bound = Q3 + 1.5*IQR

        # Identify the outliers
        outliers = df[(df['Temp[°C]'] < lower_bound) | (df['Temp[°C]'] > upper_bound)]
        
        # Save the outliers to a new CSV file
        outliers.to_csv(f"{data_dir}/{input_file[:-4]}_outliers.csv", index=False)
        
        # Remove the outliers
        df_cleaned = df[(df['Temp[°C]'] >= lower_bound) & (df['Temp[°C]'] <= upper_bound)]

        # Number of Outliers Removed
        num_outliers_removed = len(outliers)
        
        # Print Number of Outliers Removed
        print(f"{num_outliers_removed} outliers were removed from {input_file}")
        
        # Save the cleaned data to a new CSV file with the intended name
        cleaned_file_number = i + 1
        if cleaned_file_number == 1:
            cleaned_file_name = f"{input_file[:-4]}_cleaned_1.csv"
        else:
            cleaned_file_name = f"{input_file[:-4]}_cleaned_{cleaned_file_number}.csv"
        cleaned_file_name = cleaned_file_name.replace("_1_cleaned", "")
        df_cleaned.to_csv(f"{data_dir}/{cleaned_file_name}", index=False)
          
        # Set the figure size
        fig, ax = plt.subplots(figsize=(16, 9))
        plt.style.use("tableau-colorblind10")
                
        # Plot the data
        ax.scatter(df['Time'], df['Temp[°C]'], color='black', s=20, alpha=1,)
        ax.scatter(df_cleaned['Time'], df_cleaned['Temp[°C]'], color='red', s=20, alpha=1, label='Cleaned Data')
        ax.scatter(outliers['Time'], outliers['Temp[°C]'], color='blue', s=20, alpha=1, label='Outliers')
        
        # Add a legend and labels
        ax.legend()
        ax.set_xlabel('Time', fontsize=18)
        ax.set_ylabel('Temperature [°C]', fontsize=18)
        ax.set_title('03-M8 Preprocessed and Cleaned Temperature Data', fontsize=22)
        
        # Modify x-axis ticks to display every 10th tick
        n = 10
        plt.xticks(np.arange(0, len(df["Time"]), n))
        plt.xticks(rotation=45, fontsize=12)
    
        # Modify y-axis ticks
        interval = 0.2
        plt.yticks(np.arange(df['Temp[°C]'].min(), df['Temp[°C]'].max() + interval, interval), fontsize=14)

        # Convert 'Time' column to datetime format
        outliers = outliers.assign(Time=pd.to_datetime(outliers['Time']))

        # Calculate the total duration of all outlier periods
        if not outliers.empty:
            total_duration = 0
            for i in range(1, len(outliers)):
                duration = (outliers['Time'].iloc[i] - outliers['Time'].iloc[i - 1]).total_seconds()
                total_duration += duration

            minutes = int(total_duration // 60)
            seconds = int(total_duration % 60)
            print(f"Total duration of all outlier periods: {minutes} minutes and {seconds} seconds")
        else:
            print("No outliers were found in the dataset.")

       
        # Save the plot in the "images" directory with the appropriate name
        plot_name = f"{input_file[:-4]}_iteration_{i+1}.jpg"
        plt.savefig(os.path.join('images', plot_name))
        
        # Show the plot
        plt.show()

In [None]:
# Temperature v/s Speed Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

# Load data into pandas dataframe
df = pd.read_csv(f"{data_dir}/03_m7c_input.csv")

# Set line color, marker style, and marker size for Temperature line
temp_color = 'r'
temp_marker = 'o'
temp_markersize = 8

# Set line color, marker style, and marker size for Speed line
speed_color = 'b'
speed_marker = 's'
speed_markersize = 6

# Create a figure with two y-axes
fig, ax1 = plt.subplots(figsize=(16, 9))
plt.style.use("tableau-colorblind10")

# Set the x-axis values and plot the first variable on the primary y-axis
x = df['Time']
y1 = df['Temp[°C]']
ax1.plot(x, y1, temp_marker, markersize=temp_markersize, label='Temperature [°C]', linestyle='', color=temp_color)
ax1.set_xlabel('Time', fontsize=16)
ax1.set_ylabel('Temperature [°C]', fontsize=16)

# Set the primary y-axis interval
ax1.yaxis.set_major_locator(MultipleLocator(0.2))
ax1.tick_params(axis='both', labelsize=14)

# Create a second y-axis on the right side of the plot
ax2 = ax1.twinx()

# Plot the second variable on the secondary y-axis
y2 = df['Speed[km/h]']
ax2.plot(x, y2, speed_marker, markersize=speed_markersize, label='Speed [km/h]', linestyle=':', color=speed_color, alpha=0.4)
ax2.set_ylabel('Speed [km/h]', fontsize=16)

# Set the secondary y-axis interval
ax2.yaxis.set_major_locator(MultipleLocator(5))
ax2.tick_params(axis='both', labelsize=14)

# Modify x-axis ticks to display every 8th tick and rotate the labels
n = 12
plt.xticks(np.arange(0, len(df["Time"]), n))
plt.xticks(fontsize=14)
for tick in ax1.get_xticklabels():
    tick.set_rotation(45)

# Add a legend for both lines
# Set the title and axis labels
plt.title('03-M7car ' + 'Temperature & Speed' + ' Preprocessed Data', fontsize=24)
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax2.legend(lines_1 + lines_2, labels_1 + labels_2, loc='best', fontsize=14)

# Save the plot as a JPEG image
plt.savefig('0_TEMP-SPEED.jpg', dpi=300, bbox_inches='tight')

# Show the plot
plt.show()

In [None]:
# Temperature v/s Speed Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

# Load data into pandas dataframe
df = pd.read_csv(f"{data_dir}/03_m7c_input_cleaned_1.csv")

# Set line color, marker style, and marker size for Temperature line
temp_color = 'r'
temp_marker = 'o'
temp_markersize = 8

# Set line color, marker style, and marker size for Speed line
speed_color = 'b'
speed_marker = 's'
speed_markersize = 6

# Create a figure with two y-axes
fig, ax1 = plt.subplots(figsize=(16, 9))
plt.style.use("tableau-colorblind10")

# Set the x-axis values and plot the first variable on the primary y-axis
x = df['Time']
y1 = df['Temp[°C]']
ax1.plot(x, y1, temp_marker, markersize=temp_markersize, label='Temperature [°C]', linestyle='', color=temp_color)
ax1.set_xlabel('Time', fontsize=16)
ax1.set_ylabel('Temperature [°C]', fontsize=16)

# Set the primary y-axis interval
ax1.yaxis.set_major_locator(MultipleLocator(0.2))
ax1.tick_params(axis='both', labelsize=14)

# Create a second y-axis on the right side of the plot
ax2 = ax1.twinx()

# Plot the second variable on the secondary y-axis
y2 = df['Speed[km/h]']
ax2.plot(x, y2, speed_marker, markersize=speed_markersize, label='Speed [km/h]', linestyle=':', color=speed_color, alpha=0.4)
ax2.set_ylabel('Speed [km/h]', fontsize=16)

# Set the secondary y-axis interval
ax2.yaxis.set_major_locator(MultipleLocator(5))
ax2.tick_params(axis='both', labelsize=14)

# Modify x-axis ticks to display every 8th tick and rotate the labels
n = 12
plt.xticks(np.arange(0, len(df["Time"]), n))
plt.xticks(fontsize=14)
for tick in ax1.get_xticklabels():
    tick.set_rotation(45)

# Add a legend for both lines
# Set the title and axis labels
plt.title('03-M7car ' + 'Temperature & Speed' + ' 1st Iteration Data', fontsize=24)
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax2.legend(lines_1 + lines_2, labels_1 + labels_2, loc='best', fontsize=14)

# Save the plot as a JPEG image
plt.savefig('1_TEMP-SPEED.jpg', dpi=300, bbox_inches='tight')

# Show the plot
plt.show()

# Humidity dataset

In [None]:
# List of input file names
input_files = [
    '03_m7c_input.csv'
]


# Loop twice for outlier check and removal
for i in range(2):
    # Use original input files in the first iteration
    if i == 0:
        input_files_to_use = input_files
    # Use cleaned files generated in the first iteration in the second iteration
    else:
        input_files_to_use = [f[:-4] + f'_cleaned_{i}.csv' for f in input_files]

     
    # Loop through the input files
    for input_file in input_files_to_use:
        # Load the data
        if i == 0:
            # Use original input file in the first iteration
            df = pd.read_csv(f"{data_dir}/{input_file}")
        else:
            # Use cleaned file in the second iteration
            df = pd.read_csv(f"{data_dir}/{input_file}")

        # Calculate the interquartile range (IQR)
        Q1 = df['Hum[%]'].quantile(0.25)
        Q3 = df['Hum[%]'].quantile(0.75)
        IQR = Q3 - Q1

        # Determine the upper and lower bounds
        lower_bound = Q1 - 1.5*IQR
        upper_bound = Q3 + 1.5*IQR

        # Identify the outliers
        outliers = df[(df['Hum[%]'] < lower_bound) | (df['Hum[%]'] > upper_bound)]
        
        # Save the outliers to a new CSV file
        outliers.to_csv(f"{data_dir}/{input_file[:-4]}_outliers.csv", index=False)
        
        # Remove the outliers
        df_cleaned = df[(df['Hum[%]'] >= lower_bound) & (df['Hum[%]'] <= upper_bound)]

        # Number of Outliers Removed
        num_outliers_removed = len(outliers)
        
        # Print Number of Outliers Removed
        print(f"{num_outliers_removed} outliers were removed from {input_file}")
        
        # Save the cleaned data to a new CSV file with the intended name
        cleaned_file_number = i + 1
        if cleaned_file_number == 1:
            cleaned_file_name = f"{input_file[:-4]}_cleaned_1.csv"
        else:
            cleaned_file_name = f"{input_file[:-4]}_cleaned_{cleaned_file_number}.csv"
        cleaned_file_name = cleaned_file_name.replace("_1_cleaned", "")
        df_cleaned.to_csv(f"{data_dir}/{cleaned_file_name}", index=False)
          
        # Set the figure size
        fig, ax = plt.subplots(figsize=(16, 9))
        plt.style.use("tableau-colorblind10")
                
        # Plot the data
        ax.scatter(df['Time'], df['Hum[%]'], color='black', s=20, alpha=1,)
        ax.scatter(df_cleaned['Time'], df_cleaned['Hum[%]'], color='green', s=20, alpha=1, label='Cleaned Data')
        ax.scatter(outliers['Time'], outliers['Hum[%]'], color='orange', s=20, alpha=1, label='Outliers')
        
        # Add a legend and labels
        ax.legend()
        ax.set_xlabel('Time', fontsize=18)
        ax.set_ylabel('Hum [%]', fontsize=18)
        ax.set_title('03-M8 Preprocessed and Cleaned Temperature Data', fontsize=22)
        
        # Modify x-axis ticks to display every 10th tick
        n = 10
        plt.xticks(np.arange(0, len(df["Time"]), n))
        plt.xticks(rotation=45, fontsize=12)
    
        # Modify y-axis ticks
        interval = 1
        plt.yticks(np.arange(df['Hum[%]'].min(), df['Hum[%]'].max() + interval, interval), fontsize=14)

        # Convert 'Time' column to datetime format
        df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
        
        # Identify the outliers
        outliers = df[(df['Hum[%]'] < lower_bound) | (df['Hum[%]'] > upper_bound)]

        # Calculate the total duration of all outlier periods for the full dataset
        if not outliers.empty:
            total_duration = 0
            in_outlier_period = False
            start_outlier_time = None

            for i, row in df.iterrows():
                is_outlier = row['Time'] in outliers['Time'].values

                if is_outlier and not in_outlier_period:
                    in_outlier_period = True
                    start_outlier_time = row['Time']
                elif not is_outlier and in_outlier_period:
                    in_outlier_period = False
                    duration = (row['Time'] - start_outlier_time).total_seconds()
                    total_duration += duration

            minutes = int(total_duration // 60)
            seconds = int(total_duration % 60)
            print(f"Total duration of all outlier periods: {minutes} minutes and {seconds} seconds")
        else:
            print("No outliers were found in the dataset.")
       
        # Save the plot in the "images" directory with the appropriate name
        #plot_name = f"{input_file[:-4]}_iteration_{i+1}.jpg"
        #plt.savefig(os.path.join('images', plot_name))
        
        # Show the plot
        plt.show()

In [None]:
# Humidity v/s Speed Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

# Load data into pandas dataframe
df = pd.read_csv(f"{data_dir}/03_m7c_input.csv")

# Set line color, marker style, and marker size for Temperature line
temp_color = 'g'
temp_marker = 'o'
temp_markersize = 8

# Set line color, marker style, and marker size for Speed line
speed_color = 'b'
speed_marker = 's'
speed_markersize = 6

# Create a figure with two y-axes
fig, ax1 = plt.subplots(figsize=(16, 9))
plt.style.use("tableau-colorblind10")

# Set the x-axis values and plot the first variable on the primary y-axis
x = df['Time']
y1 = df['Hum[%]']
ax1.plot(x, y1, temp_marker, markersize=temp_markersize, label='Humidity [%]', linestyle='', color=temp_color)
ax1.set_xlabel('Time', fontsize=16)
ax1.set_ylabel('Humidity [%]', fontsize=16)

# Set the primary y-axis interval
ax1.yaxis.set_major_locator(MultipleLocator(1))
ax1.tick_params(axis='both', labelsize=14)

# Create a second y-axis on the right side of the plot
ax2 = ax1.twinx()

# Plot the second variable on the secondary y-axis
y2 = df['Speed[km/h]']
ax2.plot(x, y2, speed_marker, markersize=speed_markersize, label='Speed [km/h]', linestyle=':', color=speed_color, alpha=0.4)
ax2.set_ylabel('Speed [km/h]', fontsize=16)

# Set the secondary y-axis interval
ax2.yaxis.set_major_locator(MultipleLocator(5))
ax2.tick_params(axis='both', labelsize=14)

# Modify x-axis ticks to display every 8th tick and rotate the labels
n = 12
plt.xticks(np.arange(0, len(df["Time"]), n))
plt.xticks(fontsize=14)
for tick in ax1.get_xticklabels():
    tick.set_rotation(45)

# Add a legend for both lines
# Set the title and axis labels
plt.title('03-M7car ' + 'Humidity & Speed' + ' Preprocessed Data', fontsize=24)
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax2.legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper left', fontsize=14)

# Save the plot as a JPEG image
plt.savefig('0_HUM-SPEED.jpg', dpi=300, bbox_inches='tight')

# Show the plot
plt.show()

In [None]:
# Humidity v/s Speed Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

# Load data into pandas dataframe
df = pd.read_csv(f"{data_dir}/03_m7c_input_cleaned_1.csv")

# Set line color, marker style, and marker size for Temperature line
temp_color = 'g'
temp_marker = 'o'
temp_markersize = 8

# Set line color, marker style, and marker size for Speed line
speed_color = 'b'
speed_marker = 's'
speed_markersize = 6

# Create a figure with two y-axes
fig, ax1 = plt.subplots(figsize=(16, 9))
plt.style.use("tableau-colorblind10")

# Set the x-axis values and plot the first variable on the primary y-axis
x = df['Time']
y1 = df['Hum[%]']
ax1.plot(x, y1, temp_marker, markersize=temp_markersize, label='Humidity [%]', linestyle='', color=temp_color)
ax1.set_xlabel('Time', fontsize=16)
ax1.set_ylabel('Humidity [%]', fontsize=16)

# Set the primary y-axis interval
ax1.yaxis.set_major_locator(MultipleLocator(1))
ax1.tick_params(axis='both', labelsize=14)

# Create a second y-axis on the right side of the plot
ax2 = ax1.twinx()

# Plot the second variable on the secondary y-axis
y2 = df['Speed[km/h]']
ax2.plot(x, y2, speed_marker, markersize=speed_markersize, label='Speed [km/h]', linestyle=':', color=speed_color, alpha=0.4)
ax2.set_ylabel('Speed [km/h]', fontsize=16)

# Set the secondary y-axis interval
ax2.yaxis.set_major_locator(MultipleLocator(5))
ax2.tick_params(axis='both', labelsize=14)

# Modify x-axis ticks to display every 8th tick and rotate the labels
n = 12
plt.xticks(np.arange(0, len(df["Time"]), n))
plt.xticks(fontsize=14)
for tick in ax1.get_xticklabels():
    tick.set_rotation(45)

# Add a legend for both lines
# Set the title and axis labels
plt.title('03-M7car ' + 'Humidity & Speed' + ' 1st Iteration Data', fontsize=24)
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax2.legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper left', fontsize=14)

# Save the plot as a JPEG image
plt.savefig('1_HUM-SPEED.jpg', dpi=300, bbox_inches='tight')

# Show the plot
plt.show()