Cell (1): # A IPython Notebook to analyze the Gaza-Israel 2012 crisis

The Guardian is tracking and mapping live ([link](http://www.guardian.co.uk/news/datablog/interactive/2012/nov/19/gaza-israel-verified-incidents-mapped)) the recent incidents in Gaza and Israel. As part of their data-journalism spirit, they are sharing the data as a Google Fusion Table available for access.

This notebook is an attempt to show, on the one hand, how the toolkit from the Python stack can be used for a real world data hack and, on the other, to offer deeper analysis beyond mapping of the events, both exploiting the spatial as well as the temporal dimension of the data.

+ The source document (`.ipynb` file) is stored on Github as a gist [here](https://gist.github.com/4121857), which means you can fork it and use it as a start for you own data-hack.
+ A viewable version is available [here](http://nbviewer.ipython.org/4121857/), via the IPython Notebook Viewer.

## Collaborate on the notebook!!!

In its initial version (Nov. 20th), the notebook only contains code to stream the data from the Google Fusion Table into a [`pandas`](http://pandas.pydata.org) DataFrame (which means you get the data ready to hack!). Step in and collaborate in making it a good example of how Python can help analyze real world data. Add a new view, quick visualization, summary statistic of fancy model that helps understand the data better!

To contribute, just fork the gist as you would with any git repository.

*Happy hacking!*
Cell (2): The following cell pulls the data using the API. In the meantime, Google has changed its terms and ways to access it, so this might not work.
Cell (4): If you cannot pull the data using the API, an easy alternative is to export the table to a `csv` file manually and read it separately:
Cell (7): Very basic descriptive analysis
Cell (8): + Volume of incidents by day
Cell (10): + Location of events coloured by day

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
# Trick from http://stackoverflow.com/questions/7800213/can-i-use-pythons-csv-reader-with-google-fusion-tables
api =str(sys.argv[1])
request =str(sys.argv[2])
query =str(sys.argv[3])
url = "%s?%s" % (request_url, urllib.urlencode({"sql": query, "key": api_key}))
serv_req = urllib2.Request(url=url)
serv_resp = urllib2.urlopen(serv_req)
table = serv_resp.read()
print "\nLast pull of data from the Google FusionTable: ", datetime.datetime.now()
csv = simplejson.loads(table)
del csv["kind"]
csv["data"] = csv["rows"]
del csv["rows"]
db = pd.read_json(simplejson.dumps(csv), orient="split")
db.to_csv("data.csv", header=True, index=False, encoding="utf-8")

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
db = pd.read_csv("data.csv")
with open(sys.argv[1],"w") as w392075:
    try:
        w392075.write(str(type(db))+'\n')
        w392075.write(str(db))
    except: pass

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
def parse_loc(loc, ret_lon=True):
    try:
        lon, lat = loc.split(",")
        lon, lat = lon.strip(' '), lat.strip(' ')
        lon, lat = map(float, [lon, lat])
        if ret_lon:
            return lon
        else:
            return lat
    except:
        return None
with open(sys.argv[1],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        db = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        db = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            db = True
        else:
            db = False
    elif arr[7:12] == 'float':
        db = float(arr[arr.find('>')+1:])
    else:
        db = eval(arr[arr.find('>')+1:])
db["lon"] = db["Location (approximate)"].apply(lambda x: parse_loc(x))
db["lat"] = db["Location (approximate)"].apply(lambda x: parse_loc(x, ret_lon=False))
db["Date"] = db["Date"].apply(pd.to_datetime)
db.to_csv("data-modified.csv")
db.head()
with open(sys.argv[2],"w") as w392075:
    try:
        w392075.write(str(type(x))+'\n')
        w392075.write(str(x))
    except: pass

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
with open(sys.argv[2],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        x = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        x = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            x = True
        else:
            x = False
    elif arr[7:12] == 'float':
        x = float(arr[arr.find('>')+1:])
    else:
        x = eval(arr[arr.find('>')+1:])
db = pd.read_csv("data-modified.csv")
db["Date"] = db["Date"].apply(pd.to_datetime)
with open(sys.argv[1],"r") as r3920n5:
    t = eval(r3920n5.read())
t = t.reindex(t)
by_day = t.groupby(lambda x: x.day).size()
by_day.plot(kind="bar")
plt.title("Number of events by day")
plt.show()
with open(sys.argv[3],"w") as w392075:
    try:
        w392075.write(str(type(x))+'\n')
        w392075.write(str(x))
    except: pass

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
with open(sys.argv[1],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        x = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        x = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            x = True
        else:
            x = False
    elif arr[7:12] == 'float':
        x = float(arr[arr.find('>')+1:])
    else:
        x = eval(arr[arr.find('>')+1:])
db = pd.read_csv("data-modified.csv")
db["Date"] = db["Date"].apply(pd.to_datetime)
f = plt.figure(figsize=(10, 6))
ax = f.add_subplot(111)
x, y = db["lon"], db["lat"]
s = plt.scatter(x, y, marker=".", color="k")
for d, day in db.set_index("Date").groupby(lambda x: x.day):
    x, y = day["lon"], day["lat"]
    c = cm.Set1(d/30.)
    s = plt.scatter(x, y, marker="^", color=c, label=str(d), s=20)
ax.get_yaxis().set_visible(False)
ax.get_xaxis().set_visible(False)
plt.legend(loc=2)
plt.title("Spatial distribution of events by day")
ax.set_axis_bgcolor("0.2")
with open(sys.argv[2],"w") as w392075:
    try:
        w392075.write(str(type(x))+'\n')
        w392075.write(str(x))
    except: pass
with open(sys.argv[3],"w") as w392075:
    try:
        w392075.write(str(type(y))+'\n')
        w392075.write(str(y))
    except: pass
with open(sys.argv[4],"w") as w392075:
    try:
        w392075.write(str(type(day))+'\n')
        w392075.write(str(day))
    except: pass
with open(sys.argv[5],"w") as w392075:
    try:
        w392075.write(str(type(c))+'\n')
        w392075.write(str(c))
    except: pass
try:savefig(sys.argv[])
except:pass6

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
# You'll need cartopy for a pretty map

In [None]:
#!/usr/bin/env python
import matplotlib
matplotlib.use("Agg")
import sys
import simplejson
import matplotlib.pyplot as plt
import datetime
import urllib2, urllib
import pandas as pd
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
with open(sys.argv[4],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        c = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        c = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            c = True
        else:
            c = False
    elif arr[7:12] == 'float':
        c = float(arr[arr.find('>')+1:])
    else:
        c = eval(arr[arr.find('>')+1:])
with open(sys.argv[3],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        day = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        day = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            day = True
        else:
            day = False
    elif arr[7:12] == 'float':
        day = float(arr[arr.find('>')+1:])
    else:
        day = eval(arr[arr.find('>')+1:])
with open(sys.argv[2],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        y = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        y = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            y = True
        else:
            y = False
    elif arr[7:12] == 'float':
        y = float(arr[arr.find('>')+1:])
    else:
        y = eval(arr[arr.find('>')+1:])
with open(sys.argv[1],"r") as r:
    arr = r.read()
    if arr[7:10]== 'int':
        x = int(arr[arr.find('>')+1:])
    elif arr[7:10] == 'str':
        x = arr[arr.find('>')+1:]
    elif arr[7:11] == 'bool':
        if arr.replace(' ','')[-4:] == 'True':
            x = True
        else:
            x = False
    elif arr[7:12] == 'float':
        x = float(arr[arr.find('>')+1:])
    else:
        x = eval(arr[arr.find('>')+1:])
db = pd.read_csv("data-modified.csv")
db["Date"] = db["Date"].apply(pd.to_datetime)
bg = cimgt.OSM()
src = ccrs.PlateCarree()
f = plt.figure(figsize=(20, 30))
ax = plt.axes(projection=bg.crs)
ax.add_image(bg, 9, alpha=0.5)
x, y = db["lon"], db["lat"]
extent = [y.min(), y.max(), x.min(), 34]
extent = [34, 36, x.min(), x.max()]#Manually tweaked
for d, day in db.set_index("Date").groupby(lambda x: x.day):
    y, x = day["lon"], day["lat"]
    c = cm.Set1(d/30.)
    s = plt.scatter(x, y, marker="^", color=c, label=str(d), s=40, transform=src)
ax.set_extent(extent, crs=src)
plt.legend(loc=2)
plt.title("Spatial distribution of events by day")
plt.show()
try:savefig(sys.argv[])
except:pass5