# Import Data

In [43]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Change the column width so more text will show
pd.options.display.max_colwidth = 500

# Import data from quakemaps.org
data = pd.read_csv('1440041919csv.csv')

# Check that the data has been correctly imported
data

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,EMAIL,APPROVED,VERIFIED,ACTIONABLE,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE
0,2488,20 tents was sent to Kokali Okhaldhunga,7/22/2015 16:03,"Kokali, Okhaldhunga","May 27, 20 tents was sent to Kokali Okhaldhunga","Distribution Area, Other,",27.333000,86.417000,,Okhaldhunga,...,,YES,YES,Unactionable,NO,YES,YES,,,
1,2487,Relief distributed,7/22/2015 15:58,"Bishal Nagar Marg, Handigaon, Kathmandu, Bagmati, Central Region, Nepal, 44600","May 27, 10 kgs of rice, 1 sacks of daal and clothes was sent to a orphanage at Bishalnagar Kathmandu.","Food, Distribution Area, Other,",27.718147,85.334849,,Kathmandu,...,,YES,YES,Unactionable,NO,NO,YES,,,
2,2486,Relief distributed,7/22/2015 15:54,"Lamidada, Khotang","May 27, Act4Quake team dispatched tents of Rs 1 lakh 50 thousands to Lamidanda Khotang.","Distribution Area, Shelter, Other,",27.248000,86.672000,,Other,...,,YES,YES,Unactionable,NO,YES,YES,,,
3,2485,Relief distributed,7/22/2015 15:45,"Boch, Dolakha","May 28, Youth team report back from Boach Dolakha after distributing 4500 kgs of rice, 90 pcs of blankets, 90 tents, 75 pcs of baby food, 500 kg of lentils, 4 cartoons of cooking oil to 147 affected families.","Other,",27.684000,85.988000,,Dolakha,...,,YES,NO,Unactionable,NO,YES,YES,,,
4,2484,Relief distributed,7/22/2015 15:41,"Bankhu, Kavre","Youth volunteers report back from Bankhu Kavre after distributing tents, rice, blankets, daal, kerau and bitten rice to 76 households","Distribution Area, Other,",27.364000,85.658000,,Kavrepalanchok,...,,YES,YES,Unactionable,NO,YES,YES,,,
5,2482,Relief distributed,7/22/2015 15:23,"Dhulikhel, Bagmati, Central Region, Nepal","May 28, Act4Quake team was reported back from Dhulikhel Municipality office for supporting 40 tents, 600 kg of rice, 72 pcs of soap and 16 pcs of blankets for relief operations.","Distribution Area, Other,",27.616667,85.550000,,Kavrepalanchok,...,,YES,YES,Unactionable,NO,YES,YES,,,
6,2481,Relief distributed,7/22/2015 15:21,"Gorkha, Gandaki, Western Region, Nepal","May 28, #Act4Quake youth team returned back from Gorkha after distributing 1500 kgs of rice, 250 kgs of daal, besar, sanitary pads and beaten rice to 125 households.","Distribution Area, Other,",28.000000,84.633333,,Gorkha,...,,YES,YES,Unactionable,NO,YES,YES,,,
7,2480,Relief distributed,7/22/2015 14:43,"Sundrawati, Dolakha",Act4Quake team dispatched 100 tents and beaten rice to sundrawati VDC of Dolakha district on May 29.,"Distribution Area, Other,",27.714000,86.074000,,Dolakha,...,,YES,YES,Unactionable,NO,YES,YES,,,
8,2479,Relief distibuted by AYON,7/22/2015 14:40,"Jorpati, Bagmati, Central Region, Nepal, 44600","2 pieces Tents, 4 sacks of Rice, 1 sack Daal and 1 cartoon Cooking Oil were distributed to a visually impaired community of Jorpati, Kathmandu.","Distribution Area, Other,",27.720000,85.380000,,Kathmandu,...,,YES,YES,Unactionable,NO,YES,YES,"2 pieces Tents, 4 sacks of Rice, 1 sack Daal and 1 cartoon Cooking Oil were distributed to a visually impaired community of Jorpati, Kathmandu.",,
9,2478,Relief distributed,7/22/2015 14:35,"Bethan, Ramechhap",200 pieces tents and 10 boxes of sanitary pads were dipatched to Bethan VDC of Ramechhap district.,"Distribution Area, Other,",27.497000,85.829000,,Ramechhap,...,,YES,YES,Unactionable,NO,YES,YES,,,


# Basic counts of feature values

In [44]:
# The code below is to give a high-level understanding of how the data looks for specific features.

# Are all reports approved?
# Helpful to know whether any unapproved reports make it into the final database.
data['APPROVED'].value_counts()

YES    2035
dtype: int64

In [45]:
# Are all reports verified? 
data['VERIFIED'].value_counts()

NO     1058
YES     977
dtype: int64

In [46]:
# Given that not all reports are verified - are there cases where unverified reports still have action taken on them?
# Do a cross-tab table to find out.
pd.crosstab(data['VERIFIED'], data['ACTION TAKEN'])

ACTION TAKEN,NO,YES
VERIFIED,Unnamed: 1_level_1,Unnamed: 2_level_1
NO,1015,43
YES,619,358


In [47]:
# What's the actionable status breakdown?
data['ACTIONABLE'].value_counts()

Actionable      1293
Unactionable     742
dtype: int64

In [48]:
# What's the breakdown of reports that have had action taken on them?
data['ACTION TAKEN'].value_counts()

NO     1634
YES     401
dtype: int64

In [49]:
# Calculate percent of reports that have had action taken.
float(401)/(1634+401)

0.19705159705159705

In [50]:
# How many were closed?
data['CLOSED'].value_counts()

NO     1675
YES     360
dtype: int64

In [51]:
# Calculate percent of reports that were closed
float(360)/(360+1675)

0.1769041769041769

In [52]:
# How many different incident titles exist in our data?
data['INCIDENT TITLE'].describe()

count          2035
unique         1848
top       collapsed
freq             22
Name: INCIDENT TITLE, dtype: object

# Data Cleaning

How do I know whether a report is follow-up indicating that relief has been distributed?

1. Category column: this will include different categories a report's been tagged as. We'd want ones with "Distribution Area." Usually that occurs as the first or second category.

2. Incident title: "relief distributed" will feature in the incident title, to flag that this is a follow-up report about action taken. 

In [53]:
# Sample 50 rows at random to see what the prevalence of relief distribution reports are, potential identifiers of such reports
pd.options.display.max_colwidth = 500
sample = data.sample(n=50, replace=False)
sample

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,EMAIL,APPROVED,VERIFIED,ACTIONABLE,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE
1842,156,Durbar Square,4/28/2015 14:35,"Laxmi Narayan Sattal, Ashok Binayak Marg, Maru Tole, Basantapur Durbar Square, Gorkha, ????????, ?????? ?????, ?????????? ????? ???????, Kathmandu-24, ?????",&quot;Kathmandu’s Patan Durbar Square before and after the #NepalEarthquake\n{{https://pbs.twimg.com/media/CDdffmCWoAAXybF.jpg}}\n{{https://pbs.twimg.com/media/CDdffmCWMAE8c8E.jpg}}\n{{https://pbs.twimg.com/media/CDdffmEXIAAKJLj.jpg}}\nhttps://twitter.com/Fascinatingpics/status/592046884530855936&quot;,"Earthquake Damage,",27.704099,85.30608,,,...,punit@parewalabs.com,YES,NO,Unactionable,NO,NO,NO,,,
1723,872,Kaylyanpur-6 in Nuwakot village needs food,4/29/2015 11:10,"Kalyanpur, Bagmati, Central Region, Nepal, 44900",Food needed in Kalyanpur-6 in Nuwakot village. Contact 9813383384. HR191 https://www.facebook.com/photo.php?fbid=1639960476235379&amp;amp;set=a.1379164568981639.1073741825.100006643264186&amp;amp;type=1&amp;amp;theater,"Food,",27.937588,85.101248,9813383384.0,Nuwakot,...,nepal@humanityroad.org,YES,YES,Actionable,YES,NO,NO,,Tapailai call garda safal huna sakana plz yo sms paya paxi call garnu hola.,6/18/2015 11:31
1187,963,Relief Required,5/1/2015 17:35,"Mahadev Besi, Dhading","22 households, 150 people require food, water and tents.\n\nContact detail of the relief seeker given","Food, Shelter,",27.750224,85.068146,9803130060.0,Dhading,...,,YES,NO,Actionable,NO,NO,NO,,"The relief materials are provide by five different sources (V.D.C., Church (twice), unknown organizations) . Also the UNHRC provided Tarps. Initial needs are fulfilled. Recommended closure!",5/28/2015 15:05
1482,601,Global Shapers Distribution Areas - Gorkha,4/30/2015 10:15,"residential Area, Gorkha, Gandaki, Western Region, Nepal","VDCs in Gorkha supplied : Kera Bari, Bakran, Bhairung, Namjung, Thalajung\n\ntotal supply sent to 5 VDCs\na) 8 X 500g [BHUJIA]\nb) 50 ORS [NAVAJEEVAN]\nc) 12 X 25kg [CHIURA]\nd) 4 X 30pkts [INSTANT NOODLES]\ne) 5 [PIYUSH]\nf) 1 pack [GLOVES]\ng) 6 packs [MASKS]","Distribution Area,",28.121983,84.610579,9851024008.0,Gorkha,...,,YES,NO,Actionable,NO,NO,NO,,Not reachable. Yo message payepachi please call garnu hola.,5/28/2015 15:24
899,1290,"Tents and Food needed in Namdu, Dolakha",5/3/2015 14:42,"Namdu, Dolakha","Help has not yet reached Namdu village in Dolakha. At least 100 families are in urgent need of tents/tarps, rice, and salt, among other basic necessities. \n\nIf you can help, please contact Bijaya Lama at 9841228746.","Food, Shelter,",27.789108,86.241623,9841228746.0,Dolakha,...,,YES,YES,Actionable,YES,NO,YES,"(Jun 2 2015)\nTalked to Bijaya ji, all the requirements are almost fulfilled so will update report if needed anything.",Phone switched of,6/7/2015 17:16
1471,697,urgent need for tents/warm clothes in Kot Danda Lalitpur,4/30/2015 11:22,"Kot Danda, Bisankhu Narayan VDC, Laliitpur, Nepal","More than 90% houses destroyed at Kot Danda (hill top) Bishankhu Narayan VDC at Lalipur. Urgent need of tents and warm clothes, and some fooding. https://www.youtube.com/watch?v=AYCLVlyEmtw&amp;amp;feature=em-upload_owner","Other, Food, Shelter,",27.609749,85.38512,,Lalitpur,...,kotearthquake@gmail.com,YES,NO,Actionable,NO,NO,NO,,contact not available. please provide us the contact number.,6/1/2015 12:12
785,1418,"need help at Madanpur, Nuwakot",5/4/2015 14:51,"Nuwakot, Madanpur VDC, Ward No. 1","Madanpur VDC\nWard No. 1\nNuwakot\n15-16 people dead\n350 people in the ward\nTent, Food, Water needed. Running out of Food.","Food, Shelter,",27.85868,85.184223,9803889487.0,Nuwakot,...,,YES,YES,Actionable,NO,NO,NO,"There are 200 households. Every family received Rs 7000. But they need more help in terms of tent, food and water.","Not enough relief has reached the area. There are 200 households. Every family received Rs 7000. But they need more help in terms of tent, food and water.",5/4/2015 15:33
701,1506,Indv Support - Situ Sainju,5/5/2015 11:29,"Panauti, Kavrepalanchwok","- Kavre district specially villages in Panauti \n- Offering Tents, medicines, clothes, dry food\n- &quot;Health, Medical Camps\nFood (Rice, Oil, Noodles, etc)\nBuilding Safety Assessment; Planning for low cost building&quot;\n- &quot;Has medical equipment but","Medical Assiatance, Food, Water Sanitation and Hygiene, Shelter, Medical Team, Food Security or Water, Sanitation and Hygiene Organization, Shelter Organization,",27.583333,85.516667,9851145961.0,Kavrepalanchok,...,sainjusitu21@hotmail.com,YES,NO,Actionable,NO,NO,NO,,,
1661,343,Help wanted,4/29/2015 14:35,"Joshi Galli, Chittadhar Marg, Raktakali, Kathmandu","We 60-70 people are waiting for tent and food at Raktakali, Joshi Galli. its inside galli and no one is approaching. We are next to Hisila yemi maiti. Baburam and Hisila came there but no one has came yet to next their camp to us. Contact soon. Kamal, Hari, Sita, Kumar, Naresh.\n\nDipesh Joshi\n9841209649","Food, Shelter,",27.709402,85.30673,9841209649.0,Kathmandu,...,dipjoshi@gmail.com,YES,YES,Actionable,YES,YES,YES,Now we have managed by ourselves. Please [serve] to other needy in remote villages. Regards Dipesh Joshi,Thank you Dipesh. We are closing your report.,6/18/2015 16:28
1830,166,Food distribution at Amrit Bhog,4/28/2015 15:19,"Amrit Bhog Party Palace, Kalika Marga, Pushpa Nagar, Baansh Ghari, Gorkha, ????????, ?????? ?????, ?????????? ????? ???????, Kathmandu - 32, ?????",Food distribution at Amrit Bhog,"Distribution Area,",27.704315,85.326124,,,...,punit@parewalabs.com,YES,NO,Unactionable,NO,NO,NO,,,


In [54]:
# Calculate % of random sample that had relief distribution reports

float(5) / 50

0.1

In [55]:
# What does the category typically include for follow-up reports?
data.CATEGORY.head()

0             Distribution Area, Other, 
1       Food, Distribution Area, Other, 
2    Distribution Area, Shelter, Other, 
3                                Other, 
4             Distribution Area, Other, 
Name: CATEGORY, dtype: object

In [56]:
# Use the dot-split method to create a list out of the category text (instead of having it just be a string)
data['distarea_tag'] = data.CATEGORY.apply(lambda x:x.split(", "))

# REVISIT: add the column in next to the original CATEGORY column, so it's easy to visually check that this code has worked.

In [57]:
# Use the "in" function to determine if the list of category tags for each row includes "Distribution Area"
# If it does, then a "True" is returned into the distarea_tag_dummy column 
data['distarea_tag_dummy'] = data.distarea_tag.apply(lambda x: 'Distribution Area' in x)
data.head()

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,VERIFIED,ACTIONABLE,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE,distarea_tag,distarea_tag_dummy
0,2488,20 tents was sent to Kokali Okhaldhunga,7/22/2015 16:03,"Kokali, Okhaldhunga","May 27, 20 tents was sent to Kokali Okhaldhunga","Distribution Area, Other,",27.333,86.417,,Okhaldhunga,...,YES,Unactionable,NO,YES,YES,,,,"[Distribution Area, Other, ]",True
1,2487,Relief distributed,7/22/2015 15:58,"Bishal Nagar Marg, Handigaon, Kathmandu, Bagmati, Central Region, Nepal, 44600","May 27, 10 kgs of rice, 1 sacks of daal and clothes was sent to a orphanage at Bishalnagar Kathmandu.","Food, Distribution Area, Other,",27.718147,85.334849,,Kathmandu,...,YES,Unactionable,NO,NO,YES,,,,"[Food, Distribution Area, Other, ]",True
2,2486,Relief distributed,7/22/2015 15:54,"Lamidada, Khotang","May 27, Act4Quake team dispatched tents of Rs 1 lakh 50 thousands to Lamidanda Khotang.","Distribution Area, Shelter, Other,",27.248,86.672,,Other,...,YES,Unactionable,NO,YES,YES,,,,"[Distribution Area, Shelter, Other, ]",True
3,2485,Relief distributed,7/22/2015 15:45,"Boch, Dolakha","May 28, Youth team report back from Boach Dolakha after distributing 4500 kgs of rice, 90 pcs of blankets, 90 tents, 75 pcs of baby food, 500 kg of lentils, 4 cartoons of cooking oil to 147 affected families.","Other,",27.684,85.988,,Dolakha,...,NO,Unactionable,NO,YES,YES,,,,"[Other, ]",False
4,2484,Relief distributed,7/22/2015 15:41,"Bankhu, Kavre","Youth volunteers report back from Bankhu Kavre after distributing tents, rice, blankets, daal, kerau and bitten rice to 76 households","Distribution Area, Other,",27.364,85.658,,Kavrepalanchok,...,YES,Unactionable,NO,YES,YES,,,,"[Distribution Area, Other, ]",True


In [58]:
# Determine how many reports we tagged (that included Distribution Area)
data.distarea_tag_dummy.value_counts()

False    1927
True      108
dtype: int64

In [59]:
# Do a quick check of the tagged cases - first 50.
pd.options.display.max_colwidth = 500
data[['#', 'INCIDENT TITLE', 'DESCRIPTION', 'distarea_tag', 'distarea_tag_dummy']][data.distarea_tag_dummy == True].iloc[:50]

# A few false trues:
# 1950, 617, 303 (had relief needs + distribution area), 299, 182. 

Unnamed: 0,#,INCIDENT TITLE,DESCRIPTION,distarea_tag,distarea_tag_dummy
0,2488,20 tents was sent to Kokali Okhaldhunga,"May 27, 20 tents was sent to Kokali Okhaldhunga","[Distribution Area, Other, ]",True
1,2487,Relief distributed,"May 27, 10 kgs of rice, 1 sacks of daal and clothes was sent to a orphanage at Bishalnagar Kathmandu.","[Food, Distribution Area, Other, ]",True
2,2486,Relief distributed,"May 27, Act4Quake team dispatched tents of Rs 1 lakh 50 thousands to Lamidanda Khotang.","[Distribution Area, Shelter, Other, ]",True
4,2484,Relief distributed,"Youth volunteers report back from Bankhu Kavre after distributing tents, rice, blankets, daal, kerau and bitten rice to 76 households","[Distribution Area, Other, ]",True
5,2482,Relief distributed,"May 28, Act4Quake team was reported back from Dhulikhel Municipality office for supporting 40 tents, 600 kg of rice, 72 pcs of soap and 16 pcs of blankets for relief operations.","[Distribution Area, Other, ]",True
6,2481,Relief distributed,"May 28, #Act4Quake youth team returned back from Gorkha after distributing 1500 kgs of rice, 250 kgs of daal, besar, sanitary pads and beaten rice to 125 households.","[Distribution Area, Other, ]",True
7,2480,Relief distributed,Act4Quake team dispatched 100 tents and beaten rice to sundrawati VDC of Dolakha district on May 29.,"[Distribution Area, Other, ]",True
8,2479,Relief distibuted by AYON,"2 pieces Tents, 4 sacks of Rice, 1 sack Daal and 1 cartoon Cooking Oil were distributed to a visually impaired community of Jorpati, Kathmandu.","[Distribution Area, Other, ]",True
9,2478,Relief distributed,200 pieces tents and 10 boxes of sanitary pads were dipatched to Bethan VDC of Ramechhap district.,"[Distribution Area, Other, ]",True
11,2458,"Relief distributed in Bonpale, Luitel, Western Gorkha Region","Relief supplies delivered on May 16 2015 By Green Root Nepal to Bonpale, Luitel, Western Gorkha Region\n\nVillage: Bonpale, Luitel, Western Gorkha Region \n\ncontact: Pabitra 9804181428\n\nPeople: 70\ndamaged houses: 10\nTarps: 10 (18x24)\nRice: 15 bags (bag/25kg)\nDhal: 20KG\nSalt: 30KG\nSugar: 40KG\nPotatoes: 50KG\noil: 20L\nBlankets: 0\nMattress: 0","[Distribution Area, Other, ]",True


In [60]:
# Do a quick check of the tagged cases - first 50.
pd.options.display.max_colwidth = 500
data[['#', 'INCIDENT TITLE', 'DESCRIPTION', 'distarea_tag', 'distarea_tag_dummy']][data.distarea_tag_dummy == True].iloc[50:]

# 1271, 1259, 946, 886, 1049, 734, 617, 303, 299, 

Unnamed: 0,#,INCIDENT TITLE,DESCRIPTION,distarea_tag,distarea_tag_dummy
800,1393,FRIENDS OF NEPAL DISTRIBUTION AREA - BHAKTAPUR,"delivered on 29 April\n\nFoam (ft)\t6\tmattress\nWater\t18\tJugs\nRice (kg)\t480\t30kg sacks\nBlanket\t1\t\nMix beans (kg)\t10\t3 kidney beans, 7 chana\nFlour (kg)\t4\t\nGlucose\t8","[Distribution Area, Food Security or Water, Sanitation and Hygiene Organization, Shelter Organization, ]",True
907,1271,There are about 80 houses destructed by eq frm both nearby villages,there are about 80 houses destructed by eq frm both nearby villages . uniharulai tent haru chaieko xa basnu lai gham ra pani bata bachnu lai .. sabai ghar vatkiyeko xa hijo hami gako thyam kei carton chau chau 40 kg chiura liera ... tara tyo pani uniharu lai 2 to 3 days matra pugxa hola......yo gaun haruma ma aru manche pugdaina hola .. tha pani hunna hola so .. we have to share this post like we did for nallu n help came smile emoticon\nv.dc n ward numbers are given in the pic .\nso lets st...,"[Medical Facility, Food, Distribution Area, Shelter, Medical Team, Food Security or Water, Sanitation and Hygiene Organization, Shelter Organization, Other, ]",True
920,1259,need a vehicle support,we need a vehicle support for our team to take food and tents to the affected place,"[Distribution Area, ]",True
922,1256,Global Shapers Distribution Areas - Dhading,Delivered on April 30\n\nTents\t5\tpieces,"[Distribution Area, ]",True
923,1255,Global Shapers Distribution Areas - Sindhupalchok,Delivered on April 30\n\nSolar Panels from Gham Power\t2\tpanels\n\t\t\nBatteries\t4\tunits\n\t\t\nInvertor\t1\tunit\nCharger\t1\tunit,"[Distribution Area, ]",True
925,1253,"Global Shapers Distribution Areas - (Kathmandu) Mahankal, Kapan, Chunikhel",Tarps\t40\tpieces (12*6)\nRice\t3\t25kg bags\nDaal\t1\t25 kg bags,"[Distribution Area, ]",True
926,1252,"Need volunteers to go to Lapilang, Dolakha","A team of volunteers are heading to Lapilang, Dolakha to distribute immediate relief materials like rice, medicines and warm clothes. We have vehicle ready to leave tomorrow. We urgently need more volunteers to leave with us. Please contact the following number if you are interested in volunteering. \n\n9841135018","[Distribution Area, Other, ]",True
1013,1251,Global Shapers Distribution Areas - Rasuwa,Delivered on April 29\n\nCurtains/ Sheets\t24\t pieces\nRice\t2\t25 kg bags\nDaal\t3\t25 kg bags\nBiscuit\t1\tbox\nWater\t24\tpieces,"[Distribution Area, ]",True
1021,1156,Piyush and Jeevan Jal Available till 3:30 PM,"Some stock of Piyush and Jeevan Jal is available at KUSOM, Gwarko, Lalitpur. If you are distributing them, please contact us before 3:30 PM.","[Distribution Area, ]",True
1030,1141,Sanghchowk. Sindhupalchowk Mission,We are sending reliefs to those effected in this area. We will focus on Food first.,"[Distribution Area, ]",True


In [64]:
# False positives: 14. 13% 

# data[['#', 'INCIDENT TITLE', 'DESCRIPTION']][data['INCIDENT TITLE'] == 'Help needed at Kalthari in Sindhupalchok']

In [65]:
# Use the dot-split method to create a list out of the incident title text (instead of having it just be a string)
data['incident_tag'] = data['INCIDENT TITLE'].apply(lambda x:x.split(", "))
data

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE,distarea_tag,distarea_tag_dummy,incident_tag,incident_tag_d
0,2488,20 tents was sent to Kokali Okhaldhunga,7/22/2015 16:03,"Kokali, Okhaldhunga","May 27, 20 tents was sent to Kokali Okhaldhunga","Distribution Area, Other,",27.333000,86.417000,,Okhaldhunga,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[20 tents was sent to Kokali Okhaldhunga],False
1,2487,Relief distributed,7/22/2015 15:58,"Bishal Nagar Marg, Handigaon, Kathmandu, Bagmati, Central Region, Nepal, 44600","May 27, 10 kgs of rice, 1 sacks of daal and clothes was sent to a orphanage at Bishalnagar Kathmandu.","Food, Distribution Area, Other,",27.718147,85.334849,,Kathmandu,...,NO,NO,YES,,,,"[Food, Distribution Area, Other, ]",True,[Relief distributed],True
2,2486,Relief distributed,7/22/2015 15:54,"Lamidada, Khotang","May 27, Act4Quake team dispatched tents of Rs 1 lakh 50 thousands to Lamidanda Khotang.","Distribution Area, Shelter, Other,",27.248000,86.672000,,Other,...,NO,YES,YES,,,,"[Distribution Area, Shelter, Other, ]",True,[Relief distributed],True
3,2485,Relief distributed,7/22/2015 15:45,"Boch, Dolakha","May 28, Youth team report back from Boach Dolakha after distributing 4500 kgs of rice, 90 pcs of blankets, 90 tents, 75 pcs of baby food, 500 kg of lentils, 4 cartoons of cooking oil to 147 affected families.","Other,",27.684000,85.988000,,Dolakha,...,NO,YES,YES,,,,"[Other, ]",False,[Relief distributed],True
4,2484,Relief distributed,7/22/2015 15:41,"Bankhu, Kavre","Youth volunteers report back from Bankhu Kavre after distributing tents, rice, blankets, daal, kerau and bitten rice to 76 households","Distribution Area, Other,",27.364000,85.658000,,Kavrepalanchok,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[Relief distributed],True
5,2482,Relief distributed,7/22/2015 15:23,"Dhulikhel, Bagmati, Central Region, Nepal","May 28, Act4Quake team was reported back from Dhulikhel Municipality office for supporting 40 tents, 600 kg of rice, 72 pcs of soap and 16 pcs of blankets for relief operations.","Distribution Area, Other,",27.616667,85.550000,,Kavrepalanchok,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[Relief distributed],True
6,2481,Relief distributed,7/22/2015 15:21,"Gorkha, Gandaki, Western Region, Nepal","May 28, #Act4Quake youth team returned back from Gorkha after distributing 1500 kgs of rice, 250 kgs of daal, besar, sanitary pads and beaten rice to 125 households.","Distribution Area, Other,",28.000000,84.633333,,Gorkha,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[Relief distributed],True
7,2480,Relief distributed,7/22/2015 14:43,"Sundrawati, Dolakha",Act4Quake team dispatched 100 tents and beaten rice to sundrawati VDC of Dolakha district on May 29.,"Distribution Area, Other,",27.714000,86.074000,,Dolakha,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[Relief distributed],True
8,2479,Relief distibuted by AYON,7/22/2015 14:40,"Jorpati, Bagmati, Central Region, Nepal, 44600","2 pieces Tents, 4 sacks of Rice, 1 sack Daal and 1 cartoon Cooking Oil were distributed to a visually impaired community of Jorpati, Kathmandu.","Distribution Area, Other,",27.720000,85.380000,,Kathmandu,...,NO,YES,YES,"2 pieces Tents, 4 sacks of Rice, 1 sack Daal and 1 cartoon Cooking Oil were distributed to a visually impaired community of Jorpati, Kathmandu.",,,"[Distribution Area, Other, ]",True,[Relief distibuted by AYON],False
9,2478,Relief distributed,7/22/2015 14:35,"Bethan, Ramechhap",200 pieces tents and 10 boxes of sanitary pads were dipatched to Bethan VDC of Ramechhap district.,"Distribution Area, Other,",27.497000,85.829000,,Ramechhap,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[Relief distributed],True


In [63]:
# If it does, then a "True" is returned into the distarea_tag_dummy column 
data['incident_tag_d'] = data.incident_tag.apply(lambda x: 'Relief distributed' in x)
data.head()

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE,distarea_tag,distarea_tag_dummy,incident_tag,incident_tag_d
0,2488,20 tents was sent to Kokali Okhaldhunga,7/22/2015 16:03,"Kokali, Okhaldhunga","May 27, 20 tents was sent to Kokali Okhaldhunga","Distribution Area, Other,",27.333,86.417,,Okhaldhunga,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[20 tents was sent to Kokali Okhaldhunga],False
1,2487,Relief distributed,7/22/2015 15:58,"Bishal Nagar Marg, Handigaon, Kathmandu, Bagmati, Central Region, Nepal, 44600","May 27, 10 kgs of rice, 1 sacks of daal and clothes was sent to a orphanage at Bishalnagar Kathmandu.","Food, Distribution Area, Other,",27.718147,85.334849,,Kathmandu,...,NO,NO,YES,,,,"[Food, Distribution Area, Other, ]",True,[Relief distributed],True
2,2486,Relief distributed,7/22/2015 15:54,"Lamidada, Khotang","May 27, Act4Quake team dispatched tents of Rs 1 lakh 50 thousands to Lamidanda Khotang.","Distribution Area, Shelter, Other,",27.248,86.672,,Other,...,NO,YES,YES,,,,"[Distribution Area, Shelter, Other, ]",True,[Relief distributed],True
3,2485,Relief distributed,7/22/2015 15:45,"Boch, Dolakha","May 28, Youth team report back from Boach Dolakha after distributing 4500 kgs of rice, 90 pcs of blankets, 90 tents, 75 pcs of baby food, 500 kg of lentils, 4 cartoons of cooking oil to 147 affected families.","Other,",27.684,85.988,,Dolakha,...,NO,YES,YES,,,,"[Other, ]",False,[Relief distributed],True
4,2484,Relief distributed,7/22/2015 15:41,"Bankhu, Kavre","Youth volunteers report back from Bankhu Kavre after distributing tents, rice, blankets, daal, kerau and bitten rice to 76 households","Distribution Area, Other,",27.364,85.658,,Kavrepalanchok,...,NO,YES,YES,,,,"[Distribution Area, Other, ]",True,[Relief distributed],True


In [14]:
# Only keep the cases where Distribution Area was not included in the category tags. 
# The goal is to eliminate cases that were relief distribution reports, not new reports. 
data2 = data[data.distarea_tag_dummy==False]
data2

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,VERIFIED,ACTIONABLE,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE,distarea_tag,distarea_tag_dummy
3,2485,Relief distributed,7/22/2015 15:45,"Boch, Dolakha","May 28, Youth team report back from Boach Dol...","Other,",27.684000,85.988000,,Dolakha,...,NO,Unactionable,NO,YES,YES,,,,"[Other, ]",False
10,2465,"Summary of Ri VDC, Dhading - NAPA",7/12/2015 14:06,"Ri, Dhading","Ri VDC, Dhading rice and roofing tin response\...","Medical Team, Food Security or Water, Sanitati...",28.115580,84.970000,,Dhading,...,NO,Unactionable,NO,NO,YES,,"Rod / Jan, The Assessment cluster has reported...",7/22/2015 18:40,"[Medical Team, Food Security or Water, Sanitat...",False
12,2456,"Relief distributed in Palungtar, Nakhor 10 Wes...",7/7/2015 16:59,"Nakhor, Palungtar, Gorkha",relief supplies delivered on May 16 2015 By Gr...,"Other,",28.048690,84.484140,9846291330,Gorkha,...,NO,Unactionable,NO,YES,NO,"Food, Tarps, and blanket distributed to 100 pe...",,,"[Other, ]",False
14,2453,"Falping Dadha, Sindhupalchowk - Teachers, Toil...",7/6/2015 13:14,"Fulpingdandagau, Sindhupulchok","Ward 5 and 6 in Falping Dadha, Sindhupalchowk ...","Medical Facility, Food, Water Sanitation and H...",27.759920,85.792440,9841479724,Sindhupalchok,...,NO,Actionable,NO,NO,NO,"Teachers needed for 20 students in two wards, ...","Received from E-mail list:\n\nDear All,\n\nWe ...",8/5/2015 11:44,"[Medical Facility, Food, Water Sanitation and ...",False
15,2447,Dhading Jeewanpur Ward 5 - Waterproof Shelter ...,7/3/2015 14:48,"Jeewanpur - 5, Dhading",98 dwellings in Dhading Jeewanpur Ward 5 have ...,"Conflicting Information, Shelter, Zinc Sheets,",27.762000,85.196000,9841657103,Dhading,...,YES,Actionable,NO,YES,YES,&quot;I just got word that the CGI arrived yes...,Received following from Nicky Harris (Hope For...,7/5/2015 21:38,"[Conflicting Information, Shelter, Zinc Sheets, ]",False
16,2446,"Mahadevisthan, Kavrepalanchok - Tents and shel...",7/3/2015 13:52,"Mahadevsthan Mandan, Bagmati, Central Region, ...",Follow up report from http://quakemap.org/repo...,"Shelter, Zinc Sheets,",27.719848,85.621888,9851162716,Kavrepalanchok,...,YES,Actionable,NO,NO,YES,Tents and shelter needed for up to 400 househo...,Closed based on comment in report 2174: &quot;...,7/23/2015 9:50,"[Shelter, Zinc Sheets, ]",False
17,2445,"Kulpakot Jhaukhel, Bhaktapur - Cash needed for...",7/3/2015 13:34,"Jhaukhel, Bagmati, Central Region, Nepal",Follow up report from #2134 (building support ...,"Cash,",27.690000,85.430000,9841520964,Bhaktapur,...,YES,Actionable,NO,NO,NO,Funds requested for reconstruction,,,"[Cash, ]",False
18,2444,Lalitpur - Malta - ward 1 - Food needed,7/3/2015 13:23,"Malta1, Lalitpur",Follow up report crated from #1337 (TRIP REPOR...,"Food,",27.489000,85.266000,9813956812,Lalitpur,...,YES,Actionable,NO,NO,YES,Food needed,,,"[Food, ]",False
19,2439,Urgent need of health post materials,7/2/2015 11:47,"Thulo Dhading, Bagmati, Central Region, Nepal",We are in need of some health related material...,"Medical Assiatance, Shelter,",27.650000,85.900000,9841664618,Sindhupalchok,...,NO,Actionable,NO,NO,NO,,Subscribed Global Medic to follow this report,7/20/2015 11:41,"[Medical Assiatance, Shelter, ]",False
20,2435,Tsum Valley - Zinc roofs needed for 630 families,7/1/2015 11:57,"Chhokangparo, Gandaki, Western Region, Nepal",Follow up report generated from #626 (&quot;Ts...,"Tasking: In Progress, Zinc Sheets,",28.580000,85.120000,-5188,Gorkha,...,NO,Actionable,NO,NO,NO,,,,"[Tasking: In Progress, Zinc Sheets, ]",False


In [15]:
# Check 2: How many remaining cases of "relief distributed" reports do we have? 
# Take another random sample 
data2.sample(n=50, replace=False)

Unnamed: 0,#,INCIDENT TITLE,INCIDENT DATE,LOCATION,DESCRIPTION,CATEGORY,LATITUDE,LONGITUDE,Phone Number,Most Affected District,...,VERIFIED,ACTIONABLE,URGENT,ACTION TAKEN,CLOSED,ACTION SUMMARY,COMMENT,COMMENT DATE,distarea_tag,distarea_tag_dummy
316,1999,CYF Collective Base Camp,5/15/2015 13:54,"Kavrepalanchok, Bagmati, Central Region, Nepal",Support Distribution \nKavrepalanchwok Naya Ga...,"Shelter Organization, Other,",27.525942,85.56121,9851000214.0,Kavrepalanchok,...,NO,Unactionable,NO,NO,NO,,,,"[Shelter Organization, Other, ]",False
1081,1085,"Food and shelter needed in Lhi village, Gorkha",5/2/2015 9:15,"Lhi, Lho, Gandaki, Western Region, Nepal","Phone call this AM from CANepal health worker,...","Food, Water Sanitation and Hygiene, Shelter,",28.560944,84.740371,9851068106.0,Gorkha,...,YES,Actionable,YES,NO,NO,"HOPE ALLIANCE gave noodles,biscuits, oils, tea...",Talked to Mr. Dorje. HOPE ALLIANCE gave noodle...,6/17/2015 11:55,"[Food, Water Sanitation and Hygiene, Shelter, ]",False
1103,1061,Still missing belgian Fran�ois Urbany,5/2/2015 2:51,"Langtang, Bagmati, Central Region, Nepal","Still missing Fran�ois Urbany, belgian, male, ...","Missing Person,",28.216667,85.583333,32473670112.0,Rasuwa,...,YES,Actionable,YES,NO,YES,"The receiver responded that , &quot; he will b...","The receiver responded that , &quot; he will b...",5/26/2015 14:49,"[Missing Person, ]",False
147,2265,CWIN Earthquake Relief Work in Kavrepalanchok,5/31/2015 13:05,"Kavrepalanchok, Bagmati, Central Region, Nepal","Till May 31st, CWIN Nepal distributed followin...","Food Security or Water, Sanitation and Hygiene...",27.525942,85.56121,97714278064.0,Kavrepalanchok,...,YES,Unactionable,NO,NO,NO,,,,"[Food Security or Water, Sanitation and Hygien...",False
1394,717,Building Collapsed,4/30/2015 18:00,mobile app,an old residency has collapsed where 5-6 famil...,"Building Collapsed,",27.730077,85.326386,,,...,NO,Unactionable,NO,NO,NO,,,,"[Building Collapsed, ]",False
1554,501,URGENT HELP needed,4/29/2015 21:59,"Tinmaaney Bhanjyang, Taaklung VDC, Ward no. 8,...","Food + water, Medical, Tents, Rescue","People Trapped,",27.982363,84.641606,9851039409.0,Gorkha,...,YES,Actionable,YES,NO,NO,,According to Ganesh ji as per him they have u...,6/17/2015 12:35,"[People Trapped, ]",False
1354,762,Urgent help in Nuwakot,4/30/2015 22:46,"Bat?r B?z?r, Nuwakot, Bagmati, Central Develop...",Urgent help needed in bidur muncipality ward ...,"Camp, Zinc Sheets,",27.888386,85.138969,,Nuwakot,...,YES,Actionable,YES,NO,NO,,"Talked to bina dangol ji, they have need to zi...",6/18/2015 11:16,"[Camp, Zinc Sheets, ]",False
262,2052,Food Supplies Needed for 40 Families,5/21/2015 9:50,"Dhola, Bagmati, Central Region, Nepal, 45100","40 Families in my village, Pursu in Dhading lo...","Food,",27.92,84.83,9849454553.0,Dhading,...,YES,Actionable,YES,NO,NO,Routed to Medhabi Ban through email after talk...,New Report Created: #2375\n\nNew Link: http://...,6/17/2015 12:43,"[Food, ]",False
1805,185,Shelter around jawalakhel,4/28/2015 18:07,"St. Mary's School, Aadarsha Tole, Arun Thapa M...",Shelter around jawalakhel,"Shelter Area,",27.675232,85.311162,,,...,NO,Unactionable,NO,NO,NO,,No contact information provided.,5/1/2015 11:45,"[Shelter Area, ]",False
1343,772,Dhading - urgent help needed,5/1/2015 0:22,"Dhading, Bagmati, Central Development Region, ...","Dhading, wads no 5 has received no help as yet...","Medical Assiatance,",27.99831,84.903112,9809149212.0,Dhading,...,YES,Actionable,YES,NO,NO,,phone is not reachable,6/22/2015 13:29,"[Medical Assiatance, ]",False


In [16]:
# Cases not caught:
# 2265 - CWIN Earthquake, 1813, 1911, 
# Trip reports: most likely these just contain status reports that could be a mix. 

In [17]:
# How many cases not caught this time?
float(3)/50

# Down 40%.

0.06

In [None]:
data2.

# Digging into "relief" & "distributed" titles

In [10]:
# # This approach is too much for initial stages - what we want to do is actually understand
# # what markers of a "relief distributed" report look like. 

# # We want to remove, or tag the "Relief distributed" titles.
# from sklearn.feature_extraction.text import CountVectorizer

# train_simple = ['Relief distributed',
#                'Relief distibuted',
#                'Relief distributed in']

In [11]:
# vect = CountVectorizer(decode_error = 'ignore')
# vect.fit(train_simple)
# vect.get_feature_names()

[u'distibuted', u'distributed', u'in', u'relief']

In [13]:
# transform training data into a 'document-term matrix'
# train_simple_dtm = vect.transform(train_simple)
# train_simple_dtm.toarray()

array([[0, 1, 0, 1],
       [1, 0, 0, 1],
       [0, 1, 1, 1]], dtype=int64)

In [14]:
# examine the vocabulary and document-term matrix together
# pd.DataFrame(train_simple_dtm.toarray(), columns=vect.get_feature_names())

Unnamed: 0,distibuted,distributed,in,relief
0,0,1,0,1
1,1,0,0,1
2,0,1,1,1


In [15]:
# transform testing data into a document-term matrix (using existing vocabulary)
# test_simple = ["relief distributed"]
# test_simple_dtm = vect.transform(test_simple)
# test_simple_dtm.toarray()
# pd.DataFrame(test_simple_dtm.toarray(), columns=vect.get_feature_names())

Unnamed: 0,distibuted,distributed,in,relief
0,0,1,0,1


In [1]:
# instantiate the vectorizer ( use variable name as vect)
# vect = CountVectorizer(decode_error = 'ignore')
# vect.fit(data['INCIDENT TITLE'])
# vect.get_feature_names()

In [2]:
# train_dtm = vect.transform(data['INCIDENT TITLE'])
# print train_dtm

In [3]:
# train_features = vect.get_feature_names()
# len(train_features)

In [4]:
# Explore the features of our dataset to figure out where the "reliefs" show up.
# Range[100:178] - all numbers
# Range[709:718] - pulls out all variations of dispatch + distribution
# train_features[-541:-540] - relief
# train_features[:219]

In [5]:
# train_features[-200:]

In [6]:
# convert train_dtm to a regular array
# train_arr = train_dtm.toarray()
# train_arr

In [7]:
# Create a DataFrame of tokens with their counts.
# train_token_counts = pd.DataFrame({'token':train_features, 'count':np.sum(train_arr, axis=0)})
# train_token_counts.iloc[-541]

In [8]:
# train_token_counts.iloc[709:718]

# Explore splitting out reports based on "Category" column

In [9]:
# data.CATEGORY

In [10]:
# data_cat = data.CATEGORY.apply(lambda x: x.split(",")[0])

In [11]:
# data_cat.value_counts()

In [12]:
# data_cat.describe()