In [11]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [12]:
test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')
gender = pd.read_csv('gender_submission.csv')

In [13]:
def surviving_score(train):
    # Define the parameters to link together
    classes = train['Pclass'].unique()
    sexes = ['female', 'male']
    
    link_param_score = {}
    
    # Calculate linked parameter score
    for pclass in classes:
        for sex in sexes:
            survival_rate = train[(train['Pclass'] == pclass) & (train['Sex'] == sex)]['Survived'].mean()
            link_param_score[f'{sex} in class {pclass}'] = survival_rate

    return link_param_score

In [14]:
def final_calcul(train, link_param_score):
    influence_params = ['Sex', 'Pclass']
    
    # Create age and fare groups if not already present
    train['Age'] =  train['Age'].fillna(value = 101 )
    train['Age Group'] = pd.cut(train['Age'], bins=list(range(0, 103)), labels=False, right=False)
    train['Fare Group'] = pd.cut(train['Fare'], bins=list(range(0, 5000, 10)), labels=False, right=False)

    # Calculate composite survival score for each passenger
    for index, row in train.iterrows():
        scores = []
        for param in influence_params:
            # Find the survival rate for the passenger's category in each parameter
            if param == 'Sex':
                category = row[param]
                key = f"{category} in class {row['Pclass']}"
                survival_rate = link_param_score.get(key, 0)
                scores.append(survival_rate)
        
        # Calculate the average score
        avg_score = sum(scores) / len(scores) if scores else 0
        train.at[index, 'Survival Score'] = avg_score

    # Determined survival prediction
    threshold = 0.3838
    train['Survived'] = train['Survival Score'].apply(lambda x: 1 if x >= threshold else 0)
    
    return train

In [15]:
link_param_score = surviving_score(train)
train = final_calcul(train, link_param_score)

train


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age Group,Fare Group,Survival Score
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,22,0,0.135447
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,38,7,0.968085
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,26,0,0.500000
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,35,5,0.968085
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,35,0,0.135447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,27,1,0.157407
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,19,3,0.968085
888,889,1,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,101.0,1,2,W./C. 6607,23.4500,,S,101,2,0.500000
889,890,0,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,26,3,0.368852


In [16]:
train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age Group,Fare Group,Survival Score
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,22,0,0.135447
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,38,7,0.968085
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,26,0,0.500000
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,35,5,0.968085
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,35,0,0.135447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,27,1,0.157407
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,19,3,0.968085
888,889,1,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,101.0,1,2,W./C. 6607,23.4500,,S,101,2,0.500000
889,890,0,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,26,3,0.368852


In [17]:
final_calcul = final_calcul(test, link_param_score)

In [18]:
output = final_calcul[['PassengerId','Survived']]
output.to_csv('train_04.csv',index=False)