In [None]:
import pandas as pd
from scipy.stats import chi2_contingency

In [None]:
# Load your dataset
file_path = '/content/scenario1.csv'
data = pd.read_csv(file_path)

In [None]:
# Define the mapping based on the label
def map_response(row):
    if 'A' in row['ID']:
        if row['AnswerText'] == "I went first":
            return "The car went first"
        elif row['AnswerText'] == "The pedestrian went first":
            return "The pedestrian went first"

        else:
            return row['AnswerText']  # Keep the same for other responses

    elif 'B' in row['ID']:
        if row['AnswerText'] == "I went first":
            return "The pedestrian went first"
        elif row['AnswerText'] == "The car went first":
            return "The car went first"
        else:
            return row['AnswerText']  # Keep the same for other responses

# Apply the mapping to the 'AnswerText'
data['MappedCategory'] = data.apply(map_response, axis=1)


In [None]:
# Filter data for question "x12"
question_117_dataA = data[data['ID'].str.contains('A-117')]
print(question_117_dataA)

# Filter data for question "x12"
question_117_dataB = data[data['ID'].str.contains('B-117')]
print(question_117_dataB)

         ID                 AnswerText             MappedCategory
32    A-117               I went first         The car went first
105   A-117               I went first         The car went first
154   A-117               I went first         The car went first
247   A-117               I went first         The car went first
282   A-117               I went first         The car went first
...     ...                        ...                        ...
4608  A-117             Does not apply             Does not apply
4655  A-117             Does not apply             Does not apply
4697  A-117  The pedestrian went first  The pedestrian went first
4817  A-117               I went first         The car went first
4904  A-117  The pedestrian went first  The pedestrian went first

[66 rows x 3 columns]
         ID          AnswerText             MappedCategory
33    B-117  The car went first         The car went first
121   B-117      Does not apply             Does not apply
163   B-

In [None]:
# Separate the data into 'A' (driver) and 'B' (pedestrian) responses
driver_data = question_117_dataA[question_117_dataA['ID'].str.contains('A-117')]
pedestrian_data = question_117_dataB[question_117_dataB['ID'].str.contains('B-117')]
print(driver_data)
print(pedestrian_data)

         ID                 AnswerText             MappedCategory
32    A-117               I went first         The car went first
105   A-117               I went first         The car went first
154   A-117               I went first         The car went first
247   A-117               I went first         The car went first
282   A-117               I went first         The car went first
...     ...                        ...                        ...
4608  A-117             Does not apply             Does not apply
4655  A-117             Does not apply             Does not apply
4697  A-117  The pedestrian went first  The pedestrian went first
4817  A-117               I went first         The car went first
4904  A-117  The pedestrian went first  The pedestrian went first

[66 rows x 3 columns]
         ID          AnswerText             MappedCategory
33    B-117  The car went first         The car went first
121   B-117      Does not apply             Does not apply
163   B-

In [None]:
# Create frequency tables for the mapped categories
driver_counts = driver_data['MappedCategory'].value_counts()
pedestrian_counts = pedestrian_data['MappedCategory'].value_counts()
print(driver_counts)
print(pedestrian_counts)


MappedCategory
The pedestrian went first    32
The car went first           20
Does not apply               13
I'm not sure                  1
Name: count, dtype: int64
MappedCategory
The pedestrian went first    32
The car went first           17
Does not apply               14
I'm not sure                  2
Name: count, dtype: int64


In [None]:
# Combine the two tables into a single DataFrame
contingency_table = pd.DataFrame({'Driver': driver_counts, 'Pedestrian': pedestrian_counts}).fillna(0)
print(contingency_table)

                           Driver  Pedestrian
MappedCategory                               
The pedestrian went first      32          32
The car went first             20          17
Does not apply                 13          14
I'm not sure                    1           2


In [None]:
# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Output the results
print("Contingency Table:\n", contingency_table)
print("\nChi-square statistic:", chi2)
print("p-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies:\n", expected)

Contingency Table:
                            Driver  Pedestrian
MappedCategory                               
The pedestrian went first      32          32
The car went first             20          17
Does not apply                 13          14
I'm not sure                    1           2

Chi-square statistic: 0.6060153393486726
p-value: 0.895053928682053
Degrees of freedom: 3
Expected frequencies:
 [[32.24427481 31.75572519]
 [18.64122137 18.35877863]
 [13.60305344 13.39694656]
 [ 1.51145038  1.48854962]]
