In [3]:
import pandas as pd
import numpy as np
import matplotlib as plt
import warnings
import statistics
from google.cloud import bigquery as bq
%matplotlib inline

# Gain access to the account where the data is located
client = bq.Client.from_service_account_json("C:/Users/Andrew/Documents/Data Science Masters/Applied Statistics 6372/Projects/Project 1/data/ml-jth-d220ef33563e.json")
# Suppress warnings because they're annoying.
warnings.simplefilter(action="ignore")

## Read in the data

In [4]:
bombing = pd.read_csv("WW2_Bombing_Data.csv")
stations = pd.read_csv("WeatherStationsGermany.csv")

#### Merging data sets
We need to determine which weather stations were in the vicinity of a given bombing mission to extract what the weather was like at that station.  To do this, we'll consider that latitude and longitude at which the attack took place, and then create a region around that location representing what the weather was like there.

At first glance, it makes sense to consider the range that the aircraft can see to the horizon.  The formula for distance in miles is 1.22 times the square root of the height (in feet) of the aircraft.

\begin{equation}
Distance_{Horizon}(Miles) = 1.22\times\sqrt{Height(Feet)}
\end{equation}

WW2 bombing aircraft generally flew at an altitude of 10,000 feet when delivering their payload.  Using 10,000 feet for height, we see that aircraft can see approximately 122 miles to the horizon in all directions.

Each degree of lattitude is approximately 69 miles.  Therefore it's safe to say that the weather will likely be unchanged in a 2 degree latitude radius (~140 miles).

In [5]:
# Define a function that will list all station IDs within a 1 degree wide box centered around the attack
def FindStations(Latitude, Longitude):
    nearby_stations = stations[(Latitude+2 >= stations["Latitude"]) & (Latitude-2 <= stations["Latitude"]) &
                              (Longitude+2 >= stations["Longitude"]) & (Longitude-2 <= stations["Longitude"])]
    return(list(nearby_stations["StationID"]))

In [86]:
bombing_stations = bombing[bombing["DefCountry"] == "Germany"]  # Only look at bombings in Germany
bombing_stations.drop(["Details"], axis=1, inplace = True)  # We don't need the Details column, so drop it.
bombing_stations["StationIDs"] = ""  # This is just a place holder so the next line will work
# Iterate through each row, and pass the lat and long into the FindStations function
for index, row in bombing_stations.iterrows():
    bombing_stations.at[index, "StationIDs"]= FindStations(float(row["Latitude"]), float(row["Longitude"]))

This produces a new dataframe that looks like the one presented below.  The StationIDs are presented as a lit of IDs of the weather stations within range of the given attack.

In [89]:
bombing_stations.head()

Unnamed: 0.1,Unnamed: 0,Success,Month,Day,Year,Country,DefCity,DefCountry,Latitude,Longitude,StationIDs
3,3,0,9,3,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34..."
4,4,0,9,3,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34..."
5,5,0,9,4,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34..."
6,6,0,9,4,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34..."
11,11,1,12,18,1939,Germany,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34..."


## Query the database
Now that we can link the weather stations that were in the vicinity of a bombing mission, we can use the StationID to extract the weather information reported by each of those stations.  Since most (if not all) bombing missions were in range of several weather stations, it makes sense to use the average value of each parameter measured by the stations as our metric for evaluating the weather on that day for that specific attack.

To do that, we'll need to gather the attributes from the weather database that corresponds to the StationIDs collected on a given day of an attack.

In order to query the database, we'll need pass in a string that contains the SQL statement that we want to execute.  Then, convert degrees to farenheight and return the resulting dataframe.  To complicate matters, some values have -999.  These will have to be omitted so the result isn't polluted with extreme values when averaged.

In [8]:
# Define the query and load the data.  MaxWindGust and MeanWindSpeed values are all -999.0.  Remove from query
def GetWeather(query):
    query_job = client.query(query)
    weather_query = query_job.to_dataframe()

# Convert Temps to Farenheit
    try:
        weather_query['MaxTemp'] = weather_query['MaxTemp'].apply(lambda x: float((x * 9/5) + 32))
        weather_query['MinTemp'] = weather_query['MinTemp'].apply(lambda x: float((x * 9/5) + 32))
        weather_query['MeanTemp'] = weather_query['MeanTemp'].apply(lambda x: float((x * 9/5) + 32))
        weather_query['MinAirTemp'] = weather_query['MinAirTemp'].apply(lambda x: float((x * 9/5) + 32))
        weather_query.loc[weather_query['MaxTemp']<=-999.0, ['MaxTemp']]= -999.0
        weather_query.loc[weather_query['MinTemp']<=-999.0, ['MinTemp']]= -999.0
        weather_query.loc[weather_query['MeanTemp']<=-999.0, ['MeanTemp']]= -999.0
        weather_query.loc[weather_query['MinAirTemp']<=-999.0, ['MinAirTemp']]= -999.0
    except:
        print(query + " was unable to process because no weather stations were in range.")
    
# Make a new dataframe that will just be 1 row long.
    weather= weather_query.iloc[0,:]
# Then, we'll need to filter out anywhere that a value equals -999.
    def BadValues(values):
        if values==-999:
            return False
        else:
            return True
# Average the resulting table, but only where values are not -999.  Take the median of PrecipForm because it's a factor.
# If this fails, it's because there are no values that do not equal -999.
    try:
        weather["MinTemp"]= sum(filter(BadValues, weather_query["MinTemp"])) / len(list(filter(BadValues, weather_query["MinTemp"])))
    except:
        print("This one failed.  MinTemp had no non -999 values")
    try:
        weather["MaxTemp"]= sum(filter(BadValues, weather_query["MaxTemp"])) / len(list(filter(BadValues, weather_query["MaxTemp"])))
    except:
        print("This one failed.  MaxTemp had no non -999 values")
    try:
        weather["MeanTemp"]= sum(filter(BadValues, weather_query["MeanTemp"])) / len(list(filter(BadValues, weather_query["MeanTemp"])))
    except:
        print("This one failed.  MeanTemp had no non -999 values")
    try:
        weather["MinAirTemp"]= sum(filter(BadValues, weather_query["MinAirTemp"])) / len(list(filter(BadValues, weather_query["MinAirTemp"])))
    except:
        print("This one failed.  MinAirTemp had no non -999 values")
    try:
        weather["SunDuration"]= sum(filter(BadValues, weather_query["SunDuration"])) / len(list(filter(BadValues, weather_query["SunDuration"])))
    except:
        print("This one failed.  SunDuration had no non -999 values")
    try:
        weather["MeanCloudCover"]= sum(filter(BadValues, weather_query["MeanCloudCover"])) / len(list(filter(BadValues, weather_query["MeanCloudCover"])))
    except:
        print("This one failed.  MeanCloudCover had no non -999 values")
    try:
        weather["MeanCloudVapor"]= sum(filter(BadValues, weather_query["MeanCloudVapor"])) / len(list(filter(BadValues, weather_query["MeanCloudVapor"])))
    except:
        print("This one failed.  MeanCloudVapor had no non -999 values")
    try:
        weather["MeanRelHumid"]= sum(filter(BadValues, weather_query["MeanRelHumid"])) / len(list(filter(BadValues, weather_query["MeanRelHumid"])))
    except:
        print("This one failed.  MeanRelHumid had no non -999 values")
    try:
        weather["PrecipHeight"]= sum(filter(BadValues, weather_query["PrecipHeight"])) / len(list(filter(BadValues, weather_query["PrecipHeight"])))
    except:    
        print("This one failed.  PrecipHeight had no non -999 values")
    try: 
        weather["PrecipForm"]= statistics.median(filter(BadValues, weather_query["PrecipForm"]))
    except:
        print("This one failed.  PrecipForm had no non -999 values")
    try:
        weather["MeanPressure"]= sum(filter(BadValues, weather_query["MeanPressure"])) / len(list(filter(BadValues, weather_query["MeanPressure"])))
    except:
        print("This one failed.  MeanPressure had no non -999 values")
    try:
        weather["SnowDepth"]= sum(filter(BadValues, weather_query["SnowDepth"])) / len(list(filter(BadValues, weather_query["SnowDepth"])))
    except:
        print("This one failed.  SnowDepth had no non -999 values")
    return(weather)

In [280]:
bombing_stations["MinTemp"]= ""
bombing_stations["MaxTemp"]= ""
bombing_stations["MeanTemp"]= ""
bombing_stations["MinAirTemp"]= ""
bombing_stations["SunDuration"]= ""
bombing_stations["MeanCloudCover"]= ""
bombing_stations["MeanCloudVapor"]= ""
bombing_stations["MeanRelHumid"]= ""
bombing_stations["PrecipHeight"]= ""
bombing_stations["PrecipForm"]= ""
bombing_stations["MeanPressure"]= ""
bombing_stations["SnowDepth"]= ""
#main = """SELECT AVG(MinTemp) MinTemp, AVG(MaxTemp) MaxTemp, AVG(MeanTemp) MeanTemp, AVG(MinAirTemp) MinAirTemp,
#    AVG(SunDuration) SunDuration, AVG(MeanCloudCover) MeanCloudCover, AVG(MeanCloudVapor) MeanCloudVapor,
#    AVG(MeanRelHumid) MeanRelHumid, AVG(PrecipHeight) PrecipHeight, AVG(PrecipForm) PrecipForm,
#    AVG(MeanPressure) MeanPressure, AVG(SnowDepth) SnowDepth
#    FROM `ml-jth.germany.weather`
#    WHERE date = DATE("""
main = """SELECT MinTemp MinTemp, MaxTemp MaxTemp, MeanTemp MeanTemp, MinAirTemp MinAirTemp,
    SunDuration SunDuration, MeanCloudCover MeanCloudCover, MeanCloudVapor MeanCloudVapor,
    MeanRelHumid MeanRelHumid, PrecipHeight PrecipHeight, PrecipForm PrecipForm,
    MeanPressure MeanPressure, SnowDepth SnowDepth
    FROM `ml-jth.germany.weather`
    WHERE date = DATE("""
print("Collecting data...")
i= 0
for index, row in bombing_stations.iterrows():
    date= str(row.Year) + "-" + str(row.Month) + "-" + str(row.Day)
    print(i, index, date)
    query =  main + '"{}"'.format(date) + ") AND StationID IN " + "("+ str(row.StationIDs)[1:-1] + ")"
    weather = GetWeather(query)
    bombing_stations.iloc[i,11] = weather["MinTemp"]
    bombing_stations.iloc[i,12] = weather["MaxTemp"]
    bombing_stations.iloc[i,13] = weather["MeanTemp"]
    bombing_stations.iloc[i,14] = weather["MinAirTemp"]
    bombing_stations.iloc[i,15] = weather["SunDuration"]
    bombing_stations.iloc[i,16] = weather["MeanCloudCover"]
    bombing_stations.iloc[i,17] = weather["MeanCloudVapor"]
    bombing_stations.iloc[i,18] = weather["MeanRelHumid"]
    bombing_stations.iloc[i,19] = weather["PrecipHeight"]
    bombing_stations.iloc[i,20] = weather["PrecipForm"]
    bombing_stations.iloc[i,21] = weather["MeanPressure"]
    bombing_stations.iloc[i,22] = weather["SnowDepth"]
    i += 1
print("Complete")
    

Collecting data...
0 3 1939-9-3
1 4 1939-9-3
2 5 1939-9-4
3 6 1939-9-4
4 11 1939-12-18
5 18 1940-5-15
6 19 1940-5-19
7 24 1940-6-7
8 30 1940-8-25
9 37 1941-1-21
10 39 1941-3-31
11 43 1941-8-8
12 46 1941-9-7
13 47 1941-11-7
14 48 1941-12-7
15 51 1942-3-8
16 52 1942-3-13
17 53 1942-3-25
18 54 1942-3-28
19 55 1942-4-8
20 56 1942-4-17
21 57 1942-4-23
22 58 1942-4-24
23 59 1942-5-30
24 61 1942-6-25
25 70 1942-9-2
26 74 1942-12-22
27 75 1943-1-27
28 76 1943-3-5
29 77 1943-4-13
30 80 1943-5-17
31 81 1943-6-11
32 82 1943-6-13
33 83 1943-6-26
34 84 1943-6-20
35 85 1943-7-19
36 86 1943-7-24
37 91 1943-8-17
38 92 1943-8-18
39 96 1943-10-10
40 97 1943-10-14
41 98 1943-11-1
42 101 1943-11-3
43 102 1943-11-18
44 103 1943-11-22
45 109 1944-2-19
46 110 1944-2-20
47 111 1944-3-6
48 112 1944-3-1
49 122 1944-7-23
50 124 1944-7-26
51 125 1944-7-28
52 126 1944-8-27
53 133 1945-2-3
54 134 1945-2-13
55 135 1945-3-12
56 136 1945-3-14
57 137 1945-2-1
58 138 1945-3-17
59 139 1945-3-18
60 140 1945-3-22
61 143 19

In [126]:
from IPython.display import display

pd.options.display.max_columns = None
pd.options.display.max_rows = None
bombing_stations

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Success,Month,Day,Year,Country,DefCity,DefCountry,Latitude,Longitude,StationIDs,MinTemp,MaxTemp,MeanTemp,MinAirTemp,SunDuration,MeanCloudCover,MeanCloudVapor,MeanRelHumid,PrecipHeight,PrecipForm,MeanPressure,SnowDepth
0,3,3,0,9,3,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",59.12,78.02,67.868,55.295,7.757143,4.5,17.153333,73.8,1.433333,0.0,1011.783333,0.0
1,4,4,0,9,3,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",59.12,78.02,67.868,55.295,7.757143,4.5,17.153333,73.8,1.433333,0.0,1011.783333,0.0
2,5,5,0,9,4,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",58.568,69.836,63.512,56.735,1.957143,6.573333,15.94,79.266667,8.813333,1.0,1010.033333,0.0
3,6,6,0,9,4,1939,United Kingdom,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",58.568,69.836,63.512,56.735,1.957143,6.573333,15.94,79.266667,8.813333,1.0,1010.033333,0.0
4,11,11,1,12,18,1939,Germany,Wilhelmshaven,Germany,53.53234,8.106872,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",16.22,28.424,22.676,13.792308,2.775,2.18,3.52,83.8,0.0,0.0,1021.316667,0.066667
5,18,18,1,5,15,1940,United Kingdom,Ruhr,Germany,51.37315,7.629154,"[3, 44, 78, 91, 93, 98, 150, 161, 172, 186, 19...",45.457368,68.189474,56.783158,42.44,12.266667,2.386842,9.711111,62.611111,0.005263,0.0,984.091304,0.0
6,19,19,1,5,19,1940,France,Berlin,Germany,52.52001,13.40495,"[116, 129, 131, 164, 167, 169, 184, 207, 222, ...",41.09,61.102727,51.431429,38.942857,8.6,4.89,7.861111,61.894737,0.709091,0.0,999.964706,0.0
7,24,24,1,6,7,1940,France,Berlin,Germany,52.52001,13.40495,"[116, 129, 131, 164, 167, 169, 184, 207, 222, ...",52.830909,77.409091,66.609091,51.645714,13.915385,2.214286,13.916667,64.3,0.0,0.0,997.572222,0.0
8,30,30,1,8,25,1940,United Kingdom,Berlin,Germany,52.52001,13.40495,"[116, 129, 131, 164, 167, 169, 184, 207, 222, ...",48.756364,63.581818,57.157143,47.042857,2.978571,7.38,13.461111,85.842105,1.509091,1.0,1004.058824,0.0
9,37,37,1,1,21,1941,Germany,Berlin,Germany,52.52001,13.40495,"[116, 129, 131, 164, 167, 169, 184, 207, 222, ...",30.782353,39.771765,37.092941,28.965714,0.028571,7.7375,6.366667,85.125,1.811765,1.0,974.2,19.1875


The resulting dataframe contains the weather parameters averaged over all of the stations that were in range of each attack.  While the column "StationIDs" represents the stations that were in range of the attack, it does **not** reflect which stations actually had data on that day.

All that is left to do now is convert the dataframe to a .csv and upload it for future use.

In [283]:
# Convert the pandas dataframe to a .csv
bombing_stations.to_csv("Bombing_Weather_Data.csv")

## Obtain additional data
Now that we have the weather data for the bombing missions.  Let's see if we can grab some additional data for the years before the attack.  With this additional data, we may be able to deduce which conditions lead to a successful attack!

There are 32 unique cities that were bombed in Germany.  Let's get a sample of weather observations representing a random day chosen each month at each city location for the last 3 years before the first bombing mission.

In [15]:
print(bombing_stations.DefCity.unique())

['Wilhelmshaven' 'Ruhr' 'Berlin' 'Emden' 'Berlin ' 'Aachen' 'Essen'
 'Cologne' 'Lubeck' 'Hamburg' 'Augsburg' 'Norwich' 'Rostock' 'Bremen'
 'Karlsruhe' 'Frankfurt' 'Dortmund ' 'Munster' 'Regensburg'
 'Friedrichshafen' 'Rome' 'Greifswald' 'Schweinfurt' 'Leipzig' 'Kiel'
 'Merseburg' 'Dresden' 'Dortmund' 'Bielefeld ' 'Heildeshime' 'Remagen'
 'Oppenheim']


In [119]:
# First, create a dataframe that will hold each observation.
bombing_stations = pd.read_csv("Bombing_Weather_Data.csv")
#pre_bombing = pd.DataFrame({"Success":"","Month":"","Day":"","Year":"","Country":"","DefCity":"","DefCountry":"",
#                            "Latitude":"","Longitude":"","StationIDs":"","MinTemp":"","MaxTemp":"","MeanTemp":"",
#                            "MinAirTemp":"","SunDuration":"","MeanCloudCover":"","MeanCloudVapor":"",
#                            "MeanRelHumid":"","PrecipHeight":"","PrecipForm":"","MeanPressure":"","SnowDepth":""})
#pre_bombing = pd.DataFrame()
columns = ["Unnamed: 0","Success","Month","Day","Year","Country","DefCity","DefCountry","Latitude","Longitude","StationIDs",
           "MinTemp","MaxTemp","MeanTemp","MinAirTemp","SunDuration","MeanCloudCover","MeanCloudVapor",
           "MeanRelHumid","PrecipHeight","PrecipForm","MeanPressure","SnowDepth"]
pre_bombing = pd.DataFrame(columns = columns, index= range(1152))
pre_bombing["Country"]= "Germany"
pre_bombing["DefCountry"] = "Germany"

In [128]:
# Now we'll generate a list of a random day in each month for each year for each city.  We'll also need to identify the
# lat/long and StationIDs for those cities which we can pull directly from the bombing_stations dataframe.

import random
i = 0
for city in bombing_stations.DefCity.unique():
    # Generate random dates
    days1937 = random.sample(range(1,28),12)
    days1938 = random.sample(range(1,28),12)
    days1939 = random.sample(range(1,28),12)
    days = days1937 + days1938 + days1939
    months = [8,9,10,11,12,1,2,3,4,5,6,7]*3
    years = [1937]*12 + [1938]*12 + [1939]*12
    
    # Assign values to the correct columns.  We need the next 36 rows to be the same, so iloc takes on what row
    # we're currently on, i, and goes all the way up to i+36.  The second value in iloc corresponds to the
    # column index we're interested in.
    pre_bombing.iloc[i:i+36,2] = months
    pre_bombing.iloc[i:i+36,3] = days
    pre_bombing.iloc[i:i+36,4] = years
    pre_bombing.iloc[i:i+36,6] = city
    # This part's tricky.  We need the lat/long and StationIDs for the city.  We'll look in bombing_stations where
    # the DefCity is equal to the city we're working on, then extract the first element [0] from the list of results.
    pre_bombing.iloc[i:i+36,8] = list(bombing_stations["Latitude"][bombing_stations["DefCity"]==city])[0]
    pre_bombing.iloc[i:i+36,9] = list(bombing_stations["Longitude"][bombing_stations["DefCity"]==city])[0]
    pre_bombing.iloc[i:i+36,10] = list(bombing_stations["StationIDs"][bombing_stations["DefCity"]==city])[0]
    i+=36

In [129]:
# The result is a dataframe that contains everything we need to query the database.
pre_bombing.head()

Unnamed: 0.1,Unnamed: 0,Success,Month,Day,Year,Country,DefCity,DefCountry,Latitude,Longitude,StationIDs,MinTemp,MaxTemp,MeanTemp,MinAirTemp,SunDuration,MeanCloudCover,MeanCloudVapor,MeanRelHumid,PrecipHeight,PrecipForm,MeanPressure,SnowDepth
0,,,8,27,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",,,,,,,,,,,,
1,,,9,14,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",,,,,,,,,,,,
2,,,10,20,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",,,,,,,,,,,,
3,,,11,4,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",,,,,,,,,,,,
4,,,12,7,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",,,,,,,,,,,,


#### Now query the database with the new dataframe
There are 1152 observations.  This will take several hours to run.  So once it completes successfully, save the output to a csv and NEVER RUN THIS AGAIN!

In [130]:
main = """SELECT MinTemp MinTemp, MaxTemp MaxTemp, MeanTemp MeanTemp, MinAirTemp MinAirTemp,
    SunDuration SunDuration, MeanCloudCover MeanCloudCover, MeanCloudVapor MeanCloudVapor,
    MeanRelHumid MeanRelHumid, PrecipHeight PrecipHeight, PrecipForm PrecipForm,
    MeanPressure MeanPressure, SnowDepth SnowDepth
    FROM `ml-jth.germany.weather`
    WHERE date = DATE("""
print("Collecting data...")
i= 0
for index, row in pre_bombing.iterrows():
    date= str(row.Year) + "-" + str(row.Month) + "-" + str(row.Day)
    print(i, index, date)
    query =  main + '"{}"'.format(date) + ") AND StationID IN " + "("+ str(row.StationIDs)[1:-1] + ")"
    weather = GetWeather(query)
    pre_bombing.iloc[i,11] = weather["MinTemp"]
    pre_bombing.iloc[i,12] = weather["MaxTemp"]
    pre_bombing.iloc[i,13] = weather["MeanTemp"]
    pre_bombing.iloc[i,14] = weather["MinAirTemp"]
    pre_bombing.iloc[i,15] = weather["SunDuration"]
    pre_bombing.iloc[i,16] = weather["MeanCloudCover"]
    pre_bombing.iloc[i,17] = weather["MeanCloudVapor"]
    pre_bombing.iloc[i,18] = weather["MeanRelHumid"]
    pre_bombing.iloc[i,19] = weather["PrecipHeight"]
    pre_bombing.iloc[i,20] = weather["PrecipForm"]
    pre_bombing.iloc[i,21] = weather["MeanPressure"]
    pre_bombing.iloc[i,22] = weather["SnowDepth"]
    i += 1
print("Complete")

Collecting data...
0 0 1937-8-27
1 1 1937-9-14
2 2 1937-10-20
3 3 1937-11-4
4 4 1937-12-7
5 5 1937-1-3
6 6 1937-2-11
7 7 1937-3-17
8 8 1937-4-22
9 9 1937-5-16
10 10 1937-6-26
11 11 1937-7-13
12 12 1938-8-4
13 13 1938-9-20
14 14 1938-10-22
15 15 1938-11-26
16 16 1938-12-14
17 17 1938-1-6
18 18 1938-2-10
19 19 1938-3-27
20 20 1938-4-7
21 21 1938-5-19
22 22 1938-6-21
23 23 1938-7-15
24 24 1939-8-15
25 25 1939-9-16
26 26 1939-10-27
27 27 1939-11-12
28 28 1939-12-4
29 29 1939-1-11
30 30 1939-2-23
31 31 1939-3-6
32 32 1939-4-5
33 33 1939-5-19
34 34 1939-6-25
35 35 1939-7-9
36 36 1937-8-26
37 37 1937-9-1
38 38 1937-10-7
39 39 1937-11-15
40 40 1937-12-27
41 41 1937-1-4
42 42 1937-2-17
43 43 1937-3-13
44 44 1937-4-22
45 45 1937-5-2
46 46 1937-6-9
47 47 1937-7-21
48 48 1938-8-26
49 49 1938-9-7
50 50 1938-10-15
51 51 1938-11-5
52 52 1938-12-25
53 53 1938-1-10
54 54 1938-2-2
55 55 1938-3-13
56 56 1938-4-20
57 57 1938-5-9
58 58 1938-6-4
59 59 1938-7-8
60 60 1939-8-18
61 61 1939-9-11
62 62 1939-10-1

469 469 1937-9-7
470 470 1937-10-8
471 471 1937-11-26
472 472 1937-12-1
473 473 1937-1-11
474 474 1937-2-6
475 475 1937-3-21
476 476 1937-4-15
477 477 1937-5-9
478 478 1937-6-24
479 479 1937-7-14
480 480 1938-8-26
481 481 1938-9-23
482 482 1938-10-25
483 483 1938-11-24
484 484 1938-12-3
485 485 1938-1-16
486 486 1938-2-11
487 487 1938-3-18
488 488 1938-4-4
489 489 1938-5-7
490 490 1938-6-20
491 491 1938-7-2
492 492 1939-8-3
493 493 1939-9-13
494 494 1939-10-2
495 495 1939-11-8
496 496 1939-12-22
497 497 1939-1-12
498 498 1939-2-5
499 499 1939-3-24
500 500 1939-4-11
501 501 1939-5-26
502 502 1939-6-9
503 503 1939-7-6
504 504 1937-8-11
505 505 1937-9-21
506 506 1937-10-22
507 507 1937-11-19
508 508 1937-12-27
509 509 1937-1-23
510 510 1937-2-8
511 511 1937-3-16
512 512 1937-4-15
513 513 1937-5-9
514 514 1937-6-26
515 515 1937-7-13
516 516 1938-8-13
517 517 1938-9-20
518 518 1938-10-26
519 519 1938-11-6
520 520 1938-12-7
521 521 1938-1-11
522 522 1938-2-5
523 523 1938-3-1
524 524 1938-4-1

927 927 1939-11-17
928 928 1939-12-24
929 929 1939-1-3
930 930 1939-2-20
931 931 1939-3-5
932 932 1939-4-21
933 933 1939-5-11
934 934 1939-6-10
935 935 1939-7-12
936 936 1937-8-27
937 937 1937-9-1
938 938 1937-10-14
939 939 1937-11-20
940 940 1937-12-3
941 941 1937-1-17
942 942 1937-2-2
943 943 1937-3-21
944 944 1937-4-16
945 945 1937-5-6
946 946 1937-6-11
947 947 1937-7-8
948 948 1938-8-2
949 949 1938-9-22
950 950 1938-10-18
951 951 1938-11-21
952 952 1938-12-26
953 953 1938-1-19
954 954 1938-2-14
955 955 1938-3-5
956 956 1938-4-17
957 957 1938-5-20
958 958 1938-6-9
959 959 1938-7-1
960 960 1939-8-17
961 961 1939-9-4
962 962 1939-10-21
963 963 1939-11-16
964 964 1939-12-19
965 965 1939-1-22
966 966 1939-2-11
967 967 1939-3-5
968 968 1939-4-13
969 969 1939-5-26
970 970 1939-6-12
971 971 1939-7-9
972 972 1937-8-27
973 973 1937-9-8
974 974 1937-10-18
975 975 1937-11-4
976 976 1937-12-10
977 977 1937-1-26
978 978 1937-2-23
979 979 1937-3-15
980 980 1937-4-9
981 981 1937-5-13
982 982 1937-

In [131]:
# Now we have a complete dataframe with weather information over the last 3 years before bombing commenced at every
# city that was bombed in Germany during the war.
pre_bombing.head()

Unnamed: 0.1,Unnamed: 0,Success,Month,Day,Year,Country,DefCity,DefCountry,Latitude,Longitude,StationIDs,MinTemp,MaxTemp,MeanTemp,MinAirTemp,SunDuration,MeanCloudCover,MeanCloudVapor,MeanRelHumid,PrecipHeight,PrecipForm,MeanPressure,SnowDepth
0,,,8,27,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",54.344,74.12,63.62,52.0618,7.05,3.98,17.1929,85.0714,1.06,0,1017.81,0.0
1,,,9,14,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",44.864,59.336,51.752,41.36,1.8,6.09333,11.3071,86.2857,7.92667,1,994.518,0.0
2,,,10,20,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",38.12,55.664,44.66,35.1582,5.46667,3.88667,9.19286,89.9286,0.0466667,0,1014.55,0.0
3,,,11,4,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",37.664,49.7,42.272,36.0909,6.21667,3.82667,7.57143,81.9286,0.0,0,1021.45,0.0
4,,,12,7,1937,Germany,Wilhelmshaven,Germany,53.5323,8.10687,"[44, 78, 102, 172, 185, 243, 294, 326, 327, 34...",30.2,36.5,32.672,29.4964,3.78333,5.73333,5.89286,91.6429,0.453333,0,997.145,1.33333


In [132]:
# Export this to a csv and we're done!
pre_bombing.to_csv("Pre_Bombing_Weather_Data.csv")