# Importing Libraries

In [None]:
import pandas as pd
import os
import plotly.express as px 
import datetime as dt

import plotly.offline as offline
import plotly.graph_objs as go

offline.init_notebook_mode()

import numpy as np

import matplotlib

# Data shaping

In [None]:
#Data acquisition　and Merge
files = os.listdir('../input/among-us-dataset')
df = pd.concat([pd.read_csv('../input/among-us-dataset/' + f) for f in files ])
df = df.reset_index(drop=True)
    
#replace 「-」
for column in ["Task Completed", "Imposter Kills"]:
    df[column] .replace("-", 0, inplace=True)
    
df["Time to complete all tasks"] .replace("-", "00m 00s", inplace=True)
    
#replace int
for column in ["Task Completed", "Imposter Kills"]:
    df[column] = df[column].astype(int)
    
#replace float
for column in ["Game Length","Time to complete all tasks"]:
    times = []
    for time in df[column]:
        td_time = dt.timedelta(minutes=int(time[:2]), seconds=int(time[4:6]))
        times.append(td_time.total_seconds())
    df[column] = times
    
#create new columns
df["kill_pace"] = df["Game Length"]/df["Imposter Kills"]  
df["task_time"] = [df["Game Length"][i] if df["Time to complete all tasks"][i] == 0 else df["Time to complete all tasks"][i] for i in range(len(df))]
df["task_pace"] = df["task_time"]/df["Task Completed"] 
df["sabo_pace"] = df["Game Length"]/df["Sabotages Fixed"] 
df["sabo_pace"].replace(float("inf"), 0, inplace=True)

#split data
df_crew = df[df['Team'] == 'Crewmate']
df_imp = df[df['Team'] == 'Imposter']
df_imp_win = df[(df["Team"]=="Imposter") & (df["Outcome"]=="Win")].sample(n=200)
df_imp_loss = df[(df["Team"]=="Imposter") & (df["Outcome"]=="Loss")].sample(n=200)
df_crew_win = df[(df["Team"]=="Crewmate") & (df["Outcome"]=="Win")].sample(n=780)
df_crew_loss = df[(df["Team"]=="Crewmate") & (df["Outcome"]=="Loss")].sample(n=780)


df.head()

# Make histgram (definition)

In [None]:
def make_histogram(win_data, loss_data, title_, xtitle, ytitle):
    trace1 = go.Histogram(
            x = win_data,
            name = "Imposter_win",
            marker = dict(color='#33D7E9'), #FFD7E9
            opacity = 0.75
    )
    trace2 = go.Histogram(
            x = loss_data,
            name = "Imposter_loss",
            marker = dict(color='#EB89B5'),
            opacity = 0.75
    )

    layout = go.Layout(
        title = title_,
        xaxis = dict(title=xtitle),
        yaxis = dict(title=ytitle),
    )

    fig = dict(data=[trace1, trace2], layout=layout)

    return offline.iplot(fig)

# Optimal Game Length to win

In [None]:
df_imp_win_len = pd.concat([df_imp_win["Game Length"],df_crew_loss["Game Length"]],axis=0)
df_imp_loss_len = pd.concat([df_imp_loss["Game Length"], df_crew_win["Game Length"]],axis=0)
make_histogram(df_imp_win_len, df_imp_loss_len, "Inposter Game Length", "time(second)", "battle count")

Inposters tend to lose beyond 15 minutes.

# Optimal kill pace to win

In [None]:
make_histogram(df_imp_win["kill_pace"], df_imp_loss["kill_pace"], "Inposter Kill Time", "time(second)", "battle count")

If imposters don't kill at least once every 5 minutes, imposters are more likely to lose.

# Task pace to win

In [None]:
make_histogram(df_crew_loss["task_pace"], df_crew_win["task_pace"], "Crewmate Task Pace", "time(second)", "battle count")

The pace of completing tasks has little to do with winning or losing.

# Sabotage Pace to win

In [None]:
sabo_pace_win = df_crew_loss[df_crew_loss["sabo_pace"] != 0]["sabo_pace"]
sabo_pace_loss = df_crew_win[df_crew_win["sabo_pace"] != 0]["sabo_pace"]
make_histogram(sabo_pace_win, sabo_pace_loss, "Crewmate Sabotages Fixed Pace", "time(second)", "battle count")

If imposters do not sabotage at a pace of more than the number of people in 15 minutes, imposters are more likely to lose.