# Surviving on Mars


Goal : Determine the best location for a human colony on Mars

In [None]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from sklearn.cluster import KMeans
from sklearn import metrics

## Data

In [None]:
df = pd.read_csv("/kaggle/input/surviving-mars-maps/MapData-Evans-GP-Flatten.csv")
df.head()

In [None]:
df.replace(True,1,inplace=True)
df.replace(False,0, inplace=True)
df.rename(columns={'Latitude °':'Latitude_d',
                  'Longitude °':'Longitude_d'},inplace=True)

In [None]:
df.info()

### Data Wrangling

Renaming and discretizing columns.

Let's assume that every boolean column is good if it's True and bad if not. *Looking at the column names is an assumible assert*

In [None]:
df['Goodness']=np.sum(df.iloc[:,18:],axis=1)
df['Goodness'].unique()

The sum of all the boolean variables is 17 so these columns are not giving any information of which is better than other.

In [None]:
df.replace({'Topography':{'Relatively Flat':1,
                          'Steep':2,
                          'Rough':3,
                          'Mountainous':4},
           'Difficulty Challenge':{100:0,
                                  140:1,
                                  180:2,
                                  200:3,
                                  220:4,
                                  240:5}},
           inplace=True)

for i in range(df.shape[0]):
    if df.at[i,'Altitude'] <= 0:
        df.at[i,'Altitude'] = 0
    elif df.at[i,'Altitude'] > 0 and df.at[i,'Altitude']<=5e3:
        df.at[i,'Altitude'] = 1
    elif df.at[i,'Altitude'] > 5e3 and df.at[i,'Altitude'] <= 10e3:
        df.at[i,'Altitude'] = 2
    elif df.at[i,'Altitude'] > 10e3 and df.at[i,'Altitude'] <= 15e3:
        df.at[i,'Altitude'] = 3
    else:
        df.at[i,'Altitude'] = 4
        
    if df.at[i,'Temperature'] >= -20:
        df.at[i,'Temperature'] = 0
    elif df.at[i,'Temperature'] > -20 and df.at[i,'Temperature'] <= -40:
        df.at[i,'Temperature'] = 1
    elif df.at[i,'Temperature'] > -40 and df.at[i,'Temperature'] <= -60:
        df.at[i,'Temperature'] = 2
    elif df.at[i,'Temperature'] > -60 and df.at[i,'Temperature'] <= -80:
        df.at[i,'Temperature'] = 3
    else:
        df.at[i,'Temperature'] = 4

Let's assume now that every column with values in range [0,1,2,3,4,5] are categorized from not dangerous (0) to extremely dangerous.

In [None]:
df['Danger']= np.sum(df.iloc[:,5:16],axis=1)

## Visualization

In [None]:
fig = px.scatter_geo(df,lat='Latitude_d', lon='Longitude_d', color='Danger',
                        hover_name= 'Map Name', hover_data=['Danger','Altitude','Temperature'],
                        fitbounds = 'locations', basemap_visible=False,
                        projection='orthographic')
fig.update_layout(title = 'Danger Scatter')
fig.show()

## Clustering

In [None]:
df_c = df[['Danger']]

In [None]:
clust_esp = KMeans(20, random_state=42).fit_predict(df_c)
(unique, counts)=np.unique(clust_esp, return_counts=True)
print(" Cluster label along its number of elements in it:\n",
      list(zip(unique,counts)))

## Best locations

In [None]:
df['Clust']=clust_esp.tolist()
df.sort_values('Danger')

In [None]:
df1 = df[df['Clust'].isin([13])]
df2 = df[df['Clust'] != 13]

In [None]:
fig = px.scatter_geo(df1,lat='Latitude_d', lon='Longitude_d', color='Danger',
                        hover_name= 'Map Name', hover_data=['Danger','Altitude','Temperature'],
                        fitbounds = 'locations', basemap_visible=False,opacity=0.5,
                        color_continuous_scale='solar', projection='orthographic',
                    title = 'Best locations to start a colony')
fig.add_trace(go.Scattergeo(lat=df2['Latitude_d'],lon=df2['Longitude_d'],
                            marker=go.scattergeo.Marker(color='gray'),line=None,name='',
                            showlegend= False, hoverinfo= 'skip',
                            opacity=0.1))
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
print('Best locations to start a colony')
fig.show()

These are the less dangerous zones overall. I would start there. 

If a specific configuration is required, like "I want Alien imprints, Neo-concrete, Plasma Rocket and Wireless Power" you should be able to find one non-dangerous point among all points in the 'Less dangerous cluster'.

### Specific configuration

In [None]:
def configuration(params):
    df_non_d = df[df['Clust']==13]
    for param in params:
        df_non_d = df_non_d[df_non_d[param]==1.0]
    
    df_other = df.drop(index=df_non_d.index)
    
    fig = px.scatter_geo(df_non_d,lat='Latitude_d', lon='Longitude_d', color='Danger',
                        hover_name= 'Map Name', hover_data=['Danger','Altitude','Temperature'],
                        fitbounds = 'locations', basemap_visible=False,opacity=0.5,
                        color_continuous_scale='solar', projection='orthographic'
                        )
    fig.add_trace(go.Scattergeo(lat=df_other['Latitude_d'],lon=df_other['Longitude_d'],
                                marker=go.scattergeo.Marker(color='gray'),line=None,name='',
                                showlegend= False, hoverinfo= 'skip',
                                opacity=0.1))
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    print('Best locations to start a colony with specific configuration\nConfiguration: {}'.
          format(params))
    fig.show()
    return df_non_d

In [None]:
print('List of parameters: ')
print(df.iloc[:,18:-3].columns.values.tolist())
print('-------------------------------------------------------------------')
print('Copy and Paste parameters of configuration and make a list of them')

In [None]:
# Example
params=['Alien Imprints','Neo-Concrete','Plasma Rocket','Wireless Power']
configuration(params)