In [1]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')
gender = pd.read_csv('gender_submission.csv')

In [3]:
def surviving_score(train):
    # Define influencing parameters
    influence_params = ['Sex', 'SibSp', 'Parch', 'Fare Group', 'Age Group','Pclass']
    
    train['Age'] =  train['Age'].fillna(value = 101 )
    
    # Create age groups for 'Age'
    train['Age Group'] = pd.cut(train['Age'], bins=list(range(0, 103)), labels=False, right=False)
    
    # Create fare groups for 'Fare'
    train['Fare Group'] = pd.cut(train['Fare'], bins=list(range(0, 5000, 2)), labels=False, right=False)

    # Dictionary to store survival rates for each category in each parameter
    survival_rates = {}

    for param in influence_params:
        survival_rates[param] = train.groupby(param)['Survived'].mean()

    survival_rates_df = pd.DataFrame([(key, var, val) for key, series in survival_rates.items() for var, val in series.items()], columns=['Parameter', 'Category', 'Survival Rate'])

    return survival_rates_df



In [4]:
def final_calcul(train, survival_rates_df):
    influence_params = ['Sex', 'SibSp', 'Parch', 'Fare Group', 'Age Group','Pclass']
    
    # Create age and fare groups if not already present
    train['Age'] =  train['Age'].fillna(value = 101 )
    train['Age Group'] = pd.cut(train['Age'], bins=list(range(0, 103)), labels=False, right=False)
    train['Fare Group'] = pd.cut(train['Fare'], bins=list(range(0, 5000, 10)), labels=False, right=False)

    # Calculate composite survival score for each passenger
    for index, row in train.iterrows():
        scores = []
        for param in influence_params:
            # Find the survival rate for the passenger's category in each parameter
            category = row[param]
            survival_rate = survival_rates_df[(survival_rates_df['Parameter'] == param) & (survival_rates_df['Category'] == category)]['Survival Rate']
            
            if not survival_rate.empty:
                scores.append(survival_rate.iloc[0])
        
        # Calculate the average score
        avg_score = sum(scores) / len(scores) if scores else 0
        train.at[index, 'Survival Score'] = avg_score

    # Determine survival prediction (you need to decide a threshold)
    threshold = 0.40  # Example threshold
    train['Survived prediction'] = train['Survival Score'].apply(lambda x: 1 if x >= threshold else 0)
    
    return train

In [5]:
surviving_score = surviving_score(train)
surviving_score

Unnamed: 0,Parameter,Category,Survival Rate
0,Sex,female,0.742038
1,Sex,male,0.188908
2,SibSp,0,0.345395
3,SibSp,1,0.535885
4,SibSp,2,0.464286
...,...,...,...
147,Age Group,80,1.000000
148,Age Group,101,0.293785
149,Pclass,1,0.629630
150,Pclass,2,0.472826


In [6]:
final_calcul = final_calcul(train, surviving_score)

In [7]:
final_calcul

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age Group,Fare Group,Survival Score,Survived prediction
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,22,0,0.297481,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,38,7,0.506515,1
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,26,0,0.345576,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,35,5,0.540117,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,35,0,0.299684,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,27,1,0.392380,0
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,19,3,0.441400,1
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,101.0,1,2,W./C. 6607,23.4500,,S,101,2,0.385679,0
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,26,3,0.344767,0


In [8]:
output = final_calcul[['PassengerId','Survived']]
output

Unnamed: 0,PassengerId,Survived
0,1,0
1,2,1
2,3,1
3,4,1
4,5,0
...,...,...
886,887,0
887,888,1
888,889,0
889,890,1


In [9]:
output.to_csv('train_02.csv',index=False)