# 1. Import libraries and data

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [6]:
path = '/Users/tknoepfli/Documents/GitHub/UN-Peace-Missions/Data'
df = pd.read_csv(os.path.join(path, 'Prepared Data', 'un_missions.csv'))

# 2. Clean data

In [7]:
df.head()

Unnamed: 0,mission_id,mission_abbrev,mission_fullname,mission_type,mission_location,gw_country,gwno_1,gwno_2,gwno_3,gwno_4,...,version,Unnamed: 68,Unnamed: 69,Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75,Unnamed: 76
0,1,UNTSO,United Nations Truce Supervision Organization,PKO,Israel/Palestine,ISR,666,,,,...,UNPMM_1.5,,,,,,,,,
1,1,UNTSO,United Nations Truce Supervision Organization,PKO,Israel/Palestine,ISR,666,,,,...,UNPMM_1.5,,,,,,,,,
2,1,UNTSO,United Nations Truce Supervision Organization,PKO,Israel/Palestine,ISR,666,,,,...,UNPMM_1.5,,,,,,,,,
3,1,UNTSO,United Nations Truce Supervision Organization,PKO,Israel/Palestine,ISR,666,,,,...,UNPMM_1.5,,,,,,,,,
4,1,UNTSO,United Nations Truce Supervision Organization,PKO,Israel/Palestine,ISR,666,,,,...,UNPMM_1.5,,,,,,,,,


In [8]:
# Drop all columns after 'version' column
df = df.loc[:, :'version']

In [9]:
# Change settings to display all columns
pd.set_option('display.max_columns', None)

In [21]:
# Explore the data types and determine which columns need to be converted
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1013 entries, 0 to 1012
Data columns (total 68 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   mission_id           1013 non-null   int64         
 1   mission_abbrev       1013 non-null   object        
 2   mission_fullname     1013 non-null   object        
 3   mission_type         1013 non-null   object        
 4   mission_location     1013 non-null   object        
 5   gw_country           1013 non-null   object        
 6   gwno_1               1013 non-null   int64         
 7   gwno_2               391 non-null    float64       
 8   gwno_3               24 non-null     float64       
 9   gwno_4               16 non-null     float64       
 10  gwno_5               16 non-null     float64       
 11  mission_regionclass  1013 non-null   object        
 12  region_code          1013 non-null   int64         
 13  ucdp_link            1013 non-nul

In [11]:
# Check for missing values
df.isnull().sum()

mission_id            0
mission_abbrev        0
mission_fullname      0
mission_type          0
mission_location      0
                   ... 
max18               162
ocat01              162
mission_calc        222
mission_class       222
version               0
Length: 68, dtype: int64

In [12]:
# Check for duplicates
df.duplicated().sum()

0

In [13]:
# Check for duplicates
df_dups = df[df.duplicated()]
df_dups.shape

(0, 68)

In [14]:
# Check for mixed-type columns
for col in df.columns.tolist():
      weird = (df[[col]].applymap(type) != df[[col]].iloc[0].apply(type)).any(axis = 1)
      if len (df[weird]) > 0:
        print (col)

mission_startdate
mission_enddate
mission_class


In [15]:
# Display the first 5 rows of the 'mission_startdate', 'mission_enddate', 'mission_class' columns
df[['mission_startdate', 'mission_enddate', 'mission_class']].head()

Unnamed: 0,mission_startdate,mission_enddate,mission_class
0,29.05.1948,31.12.2020,
1,29.05.1948,31.12.2020,
2,29.05.1948,31.12.2020,
3,29.05.1948,31.12.2020,
4,29.05.1948,31.12.2020,


In [16]:
# Display the data types of the 'mission_startdate', 'mission_enddate', 'mission_class' columns
df[['mission_startdate', 'mission_enddate', 'mission_class']].dtypes

mission_startdate    object
mission_enddate      object
mission_class        object
dtype: object

In [17]:
# Convert the 'mission_startdate' and 'mission_enddate' columns to datetime using infer_datetime_format
df['mission_startdate'] = pd.to_datetime(df['mission_startdate'], infer_datetime_format=True)
df['mission_enddate'] = pd.to_datetime(df['mission_enddate'], infer_datetime_format=True)

# 3. Explore data

In [18]:
df.corr()

Unnamed: 0,mission_id,gwno_1,gwno_2,gwno_3,gwno_4,gwno_5,region_code,ucdp_link,ucdpconflict_id1,ucdpconflict_id2,ucdpconflict_id3,ucdpconflict_id4,mission_yearest,task_yearest,min01,min02,min03,min04,min05,min06,min07,min08,min09,min10,min11,mod01,mod02,mod03,mod04,mod05,mod06,mod07,mod08,mod09,mod10,mod11,max01,max02,max03,max04,max05,max06,max07,max08,max09,max10,max11,max12,max13,max14,max15,max16,max17,max18,ocat01,mission_calc
mission_id,1.0,0.023791,0.0428,0.628655,1.0,1.0,0.359009,-0.062512,0.254583,0.504767,0.61481,0.481941,0.856128,0.556065,0.131513,-0.032504,0.037381,0.023926,0.101311,-0.014068,-0.041581,-0.043765,0.023374,0.074303,0.047845,0.11238,0.068084,0.05283,0.026595,0.00745,0.053034,-0.007668,0.068731,0.126444,0.122029,0.109608,0.113062,0.052515,0.060757,0.051267,0.03999,0.116643,0.101197,0.048668,0.049577,0.096313,0.054757,0.094646,0.14518,0.106946,0.092352,0.077551,0.065729,0.167696,0.053798,0.286683
gwno_1,0.023791,1.0,0.868495,0.997974,1.0,1.0,-0.235911,0.278386,0.087643,0.346834,0.056251,-0.273727,-0.099663,0.076525,-0.022605,0.026903,0.018724,0.048437,-0.024054,0.035789,0.007586,-0.096836,0.011634,-0.027089,-0.030748,0.008066,-0.038497,-0.045766,-0.005203,0.047707,-0.04268,-0.008835,-0.025127,-0.008399,-0.003194,-0.015416,-0.031197,0.007492,-0.011183,0.022652,0.03055,0.020966,-0.05422,0.004634,0.008959,-0.081048,0.010464,-0.132267,-0.031904,0.017413,-0.117717,-0.062487,0.027326,-0.029146,-0.023807,-0.242209
gwno_2,0.0428,0.868495,1.0,1.0,1.0,1.0,-0.218725,0.599733,-0.021136,0.254651,0.11528,,-0.253888,0.036782,0.100517,-0.085632,-0.055214,0.054209,-0.117703,0.02917,-0.058688,-0.058487,0.014134,,0.033122,-0.043626,0.035292,0.073598,0.154703,0.101494,-0.094169,0.024687,-0.104849,0.000992,-0.048106,0.001637,0.032895,-0.021753,0.128679,,0.024687,0.140513,0.085785,0.024687,,0.002727,-0.03189,-0.104849,,0.109189,0.105556,0.064166,0.065831,0.092145,0.048307,0.183417
gwno_3,0.628655,0.997974,1.0,1.0,1.0,1.0,-0.962126,-0.772761,1.0,,,,0.558382,0.661894,-0.089642,,,,-0.089642,0.164957,,,,,,0.164957,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0
gwno_4,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,,,,,1.0,0.887641,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
gwno_5,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,,,,,1.0,0.887641,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
region_code,0.359009,-0.235911,-0.218725,-0.962126,-1.0,-1.0,1.0,-0.116046,0.150128,0.590745,0.599302,0.787545,0.424101,0.105853,0.118568,-0.053707,0.058312,-0.034223,0.10923,0.039213,0.047397,0.045093,0.037906,0.081079,0.088374,0.038226,0.139753,0.109277,0.071344,0.058556,0.086398,0.003868,0.007939,0.095782,0.063366,0.095782,0.095669,0.051486,0.097508,0.058091,-0.009113,0.110072,0.11578,0.016467,0.062794,0.132316,0.036345,0.134789,0.100309,0.089431,0.164783,0.096467,0.018037,0.120615,0.089776,0.371068
ucdp_link,-0.062512,0.278386,0.599733,-0.772761,-1.0,-1.0,-0.116046,1.0,0.011257,,,,-0.133724,-0.066802,0.025802,0.047351,0.057184,0.022136,0.004294,0.01427,0.05812,0.054875,0.060504,0.033303,0.042506,-0.034432,0.043843,0.037544,0.036862,0.049973,-0.008302,0.048678,-0.006059,0.047351,0.029336,0.047351,0.027102,0.004374,0.017908,-0.010346,0.029336,0.016117,0.062336,0.04166,0.033303,0.050146,0.043148,0.051238,0.043148,0.04459,0.037544,0.055927,0.040121,0.035336,0.052475,0.06113
ucdpconflict_id1,0.254583,0.087643,-0.021136,1.0,,,0.150128,0.011257,1.0,0.35516,0.402859,0.38013,0.191606,0.190715,-0.005375,0.10301,0.073083,0.078897,-0.000503,-0.001323,0.007821,-0.03597,-0.004523,0.041051,0.027532,0.03751,0.024996,-0.017842,-0.024177,-0.031797,0.039881,0.011346,-0.021172,0.012711,-0.019285,0.057697,0.006143,0.028427,0.033528,0.045588,-0.019638,0.032808,0.004228,0.023317,0.040041,0.017707,0.067152,0.007957,0.020361,0.064541,0.013181,0.01298,0.025441,0.034939,0.046174,0.056844
ucdpconflict_id2,0.504767,0.346834,0.254651,,,,0.590745,,0.35516,1.0,0.995364,0.999998,0.480261,0.481338,0.076824,-0.081454,0.017938,,-0.0119,0.091754,-0.011415,-0.115704,0.078135,0.099927,0.017879,0.068052,0.016879,0.037622,0.09996,-0.116963,0.037689,-0.14466,0.090189,0.016879,0.068052,0.068052,0.011894,-0.083226,0.011752,,-0.083424,0.129262,-0.021965,0.011752,-0.083226,-0.021965,-0.075191,0.01187,0.129262,0.01187,0.017938,-0.06689,0.038842,0.016879,-0.037753,-0.101841
