In [1]:
import numpy as np
import pandas as pd

In [15]:
df = pd.read_csv("TATA Mar-25 CSI.csv")

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 239 entries, 0 to 238
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   SA Professional Approach  239 non-null    int64  
 1   SA Friendliness           239 non-null    int64  
 2   SA Product Know.          238 non-null    float64
 3   On The Promised Time      239 non-null    int64  
 4   Completed First Time      239 non-null    int64  
 5   SA Explain The Work       239 non-null    int64  
 6   Cleaned Properly          239 non-null    int64  
 7   Overall Evaluation        239 non-null    int64  
 8   Satisfaction              239 non-null    object 
 9   Concern Y /N              239 non-null    object 
 10  NP Score                  239 non-null    object 
 11  ADVISOR NAME              239 non-null    object 
dtypes: float64(1), int64(7), object(4)
memory usage: 22.5+ KB


In [40]:
df.isnull().sum()

SA Professional Approach    0
SA Friendliness             0
SA Product Know.            1
On The Promised Time        0
Completed First Time        0
SA Explain The Work         0
Cleaned Properly            0
Overall Evaluation          0
Satisfaction                0
Concern Y /N                0
NP Score                    0
ADVISOR NAME                0
dtype: int64

In [41]:
df_cleaned = df.dropna()

In [42]:
df_cleaned.isnull().sum()

SA Professional Approach    0
SA Friendliness             0
SA Product Know.            0
On The Promised Time        0
Completed First Time        0
SA Explain The Work         0
Cleaned Properly            0
Overall Evaluation          0
Satisfaction                0
Concern Y /N                0
NP Score                    0
ADVISOR NAME                0
dtype: int64

In [43]:
df_cleaned.shape

(238, 12)

# Correaltion Matrix (Spearman) for Entire Dataset

In [44]:
import plotly.express as px

def plot_interactive_corr_with_count_all(df, numeric_columns):
    """
    Plots an interactive correlogram for the entire dataset using Plotly and shows the number of data points above the plot.
    
    Parameters:
    df (pd.DataFrame): The dataset to process.
    numeric_columns (list): The numeric columns to calculate correlations for.
    """
    # Calculate the correlation matrix for the entire dataset
    corr_matrix = df[numeric_columns].corr(method='spearman')
    
    # Create the interactive correlogram using Plotly
    fig = px.imshow(corr_matrix, text_auto=True, color_continuous_scale='RdBu', 
                    title=f"Spearman Correlation Matrix - Entire Dataset", aspect='auto')
    
    # Add the number of data points as an annotation above the plot
    data_points = len(df)  # Get the number of rows in the entire dataset
    fig.update_layout(
        annotations=[
            dict(
                x=0.5, y=1.1,  # Position the text at the top
                xref='paper', yref='paper',  # Referencing paper coordinates (not data coordinates)
                text=f"Data Points: {data_points}",  # Display the data point count
                showarrow=False,
                font=dict(size=14, color="black"),
                align="center"
            )
        ]
    )
    
    # Show the plot
    fig.show()

# Example usage:
plot_interactive_corr_with_count_all(df_cleaned, df_cleaned.select_dtypes(include='number').columns)


# Correlation Matrix (Spearman) Grouped by 'Service Advisor"

In [45]:
import plotly.express as px

def plot_interactive_corr_with_count(df, group_column, numeric_columns):
    """
    Plots an interactive correlogram using Plotly for each group and shows the number of data points for each group.
    
    Parameters:
    df (pd.DataFrame): The dataset to process.
    group_column (str): The column to group by (e.g., 'ADVISOR NAME').
    numeric_columns (list): The numeric columns to calculate correlations for.
    """
    # Group the dataframe by the specified column
    grouped = df.groupby(group_column)
    
    for name, group in grouped:
        # Calculate the correlation matrix for the group
        corr_matrix = group[numeric_columns].corr(method='spearman')
        
        # Create the interactive correlogram using Plotly
        fig = px.imshow(corr_matrix, text_auto=True, color_continuous_scale='RdBu', 
                        title=f"Spearman Correlation Matrix - {name}", aspect='auto')
        
        # Add the number of data points as an annotation above the plot
        data_points = len(group)  # Get the number of rows in the group
        fig.update_layout(
            annotations=[
                dict(
                    x=0.5, y=1.1,  # Position the text at the top
                    xref='paper', yref='paper',  # Referencing paper coordinates (not data coordinates)
                    text=f"Data Points: {data_points}",  # Display the data point count
                    showarrow=False,
                    font=dict(size=14, color="black"),
                    align="center"
                )
            ]
        )
        
        # Show the plot
        fig.show()

# Example usage:
plot_interactive_corr_with_count(df_cleaned, 'ADVISOR NAME', df_cleaned.select_dtypes(include='number').columns)
