In [None]:
import pandas as pd
from scipy.stats import chi2_contingency

In [None]:
# Load your dataset
file_path = '/content/scenario#3.csv'
data = pd.read_csv(file_path)

In [None]:
# Define the mapping based on the label
def map_response(row):
    if 'A' in row['ID']:
        if row['AnswerText'] == "Me":
            return "The car"
        elif row['AnswerText'] == "The pedestrian":
            return "The pedestrian"

        else:
            return row['AnswerText']  # Keep the same for other responses

    elif 'B' in row['ID']:
        if row['AnswerText'] == "The driver":
            return "The car"
        elif row['AnswerText'] == "Me":
            return "The pedestrian"
        else:
            return row['AnswerText']  # Keep the same for other responses

# Apply the mapping to the 'AnswerText'
data['MappedCategory'] = data.apply(map_response, axis=1)

In [None]:
# Filter data for question "x12"
question_112_dataA = data[data['ID'].str.contains('A-112')]
print(question_112_dataA)

# Filter data for question "x12"
question_112_dataB = data[data['ID'].str.contains('B-112')]
print(question_112_dataB)

         ID      AnswerText  MappedCategory
19    A-112      Both of us      Both of us
54    A-112              Me         The car
105   A-112              Me         The car
143   A-112              Me         The car
186   A-112              Me         The car
231   A-112              Me         The car
276   A-112  The pedestrian  The pedestrian
319   A-112      Both of us      Both of us
361   A-112      Both of us      Both of us
404   A-112      Both of us      Both of us
451   A-112      Both of us      Both of us
505   A-112      Both of us      Both of us
549   A-112              Me         The car
590   A-112      Both of us      Both of us
675   A-112      Both of us      Both of us
759   A-112  The pedestrian  The pedestrian
848   A-112  The pedestrian  The pedestrian
934   A-112  The pedestrian  The pedestrian
970   A-112      Both of us      Both of us
1012  A-112      Both of us      Both of us
1094  A-112  The pedestrian  The pedestrian
1225  A-112      Both of us     

In [None]:
# Separate the data into 'A' (driver) and 'B' (pedestrian) responses
driver_data = question_112_dataA[question_112_dataA['ID'].str.contains('A-112')]
pedestrian_data = question_112_dataB[question_112_dataB['ID'].str.contains('B-112')]
print(driver_data)
print(pedestrian_data)

         ID      AnswerText  MappedCategory
19    A-112      Both of us      Both of us
54    A-112              Me         The car
105   A-112              Me         The car
143   A-112              Me         The car
186   A-112              Me         The car
231   A-112              Me         The car
276   A-112  The pedestrian  The pedestrian
319   A-112      Both of us      Both of us
361   A-112      Both of us      Both of us
404   A-112      Both of us      Both of us
451   A-112      Both of us      Both of us
505   A-112      Both of us      Both of us
549   A-112              Me         The car
590   A-112      Both of us      Both of us
675   A-112      Both of us      Both of us
759   A-112  The pedestrian  The pedestrian
848   A-112  The pedestrian  The pedestrian
934   A-112  The pedestrian  The pedestrian
970   A-112      Both of us      Both of us
1012  A-112      Both of us      Both of us
1094  A-112  The pedestrian  The pedestrian
1225  A-112      Both of us     

In [None]:
# Create frequency tables for the mapped categories
driver_counts = driver_data['MappedCategory'].value_counts()
pedestrian_counts = pedestrian_data['MappedCategory'].value_counts()
print(driver_counts)
print(pedestrian_counts)


MappedCategory
Both of us        19
The car            9
The pedestrian     9
Name: count, dtype: int64
MappedCategory
Both of us        23
The pedestrian     8
The car            4
I'm not sure       2
Neither of us      1
Name: count, dtype: int64


In [None]:
# Combine the two tables into a single DataFrame
contingency_table = pd.DataFrame({'Driver': driver_counts, 'Pedestrian': pedestrian_counts}).fillna(0)
print(contingency_table)

                Driver  Pedestrian
MappedCategory                    
Both of us        19.0          23
I'm not sure       0.0           2
Neither of us      0.0           1
The car            9.0           4
The pedestrian     9.0           8


In [None]:
# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Output the results
print("Contingency Table:\n", contingency_table)
print("\nChi-square statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies:\n", expected)

Contingency Table:
                 Driver  Pedestrian
MappedCategory                    
Both of us        19.0          23
I'm not sure       0.0           2
Neither of us      0.0           1
The car            9.0           4
The pedestrian     9.0           8

Chi-square statistic: 5.35047069489794
p-value: 0.25318921694792323
Degrees of freedom: 4
Expected frequencies:
 [[20.72       21.28      ]
 [ 0.98666667  1.01333333]
 [ 0.49333333  0.50666667]
 [ 6.41333333  6.58666667]
 [ 8.38666667  8.61333333]]
