In [1]:
import pandas as pd

In [2]:
weather = pd.read_csv("weather_data.csv", parse_dates=["date"],
                date_parser=lambda x: pd.to_datetime(x).tz_convert(None).tz_localize("Etc/GMT+3").tz_convert("Etc/GMT-7")) 
# parse_dates specifies what columns contain dates (instead of a string column -> it becomes a date_time column)
# data_parser -> we specify our custom date_parser (Pandas has default data_parser, usually we do not need to specify it)
# in our data_parser we use "lambda" function - it means that we want to apply something to each value in the column
# pd.to_datetime(x) - converts each value to date_time obect. By default pd.to_datetime assigns GMT0 timezone, 
# which is wrong, thus, we specification of timezone with tz_convert(None)
# now we want to specify the correct timezone -> we use tz_localize("..")
# after we can convert dates to the actual SanFrancisco timezone with tz_convert("..")
weather.head()

Unnamed: 0,date,temperature,humidity,weather,wind_speed,wind_direction,pressure
0,2012-10-01 23:00:00+07:00,16.33,88.0,light rain,2.0,150.0,1009.0
1,2012-10-02 00:00:00+07:00,16.324993,87.0,sky is clear,2.0,147.0,1009.0
2,2012-10-02 01:00:00+07:00,16.310618,86.0,sky is clear,2.0,141.0,1009.0
3,2012-10-02 02:00:00+07:00,16.296243,85.0,sky is clear,2.0,135.0,1009.0
4,2012-10-02 03:00:00+07:00,16.281869,84.0,sky is clear,2.0,129.0,1009.0


In [3]:
crimes =  pd.read_csv("data.csv", usecols=["Category", "Date", "Time", "PdDistrict"]) ## specify any columns you need
crimes = crimes[crimes["Category"].isin(['BURGLARY', 'FRAUD'])] # filter out the dataframe, you can plug any list of crimes
crimes["datetime"] = crimes.apply(lambda x: pd.to_datetime(x.Date + " " + x.Time).round("H").tz_localize("ETC/GMT-7"), axis = 1) 

# Here we do a bit more complicated thing
# .apply allows us to use function for each row of a dataframe (read documentation for more info)
# so we take a row (which is x) and take cell of Date and Time -> and concatenate them to one big string
# that can be then converted to datetime. We would also want to remove any seconds and minutes (round to hours)
# then we specify that dates are in GMT-7
# the result is going to be stored in new "datetime" column

#it might take some time

# now you  can merge two datasets
crimes.head()

Unnamed: 0,Category,Date,Time,PdDistrict,datetime
5,FRAUD,10/08/2013,21:11,PARK,2013-10-08 21:00:00+07:00
12,BURGLARY,01/08/2003,16:00,BAYVIEW,2003-01-08 16:00:00+07:00
47,BURGLARY,01/05/2006,21:30,RICHMOND,2006-01-05 22:00:00+07:00
53,FRAUD,04/15/2010,15:13,SOUTHERN,2010-04-15 15:00:00+07:00
68,BURGLARY,01/25/2013,07:45,PARK,2013-01-25 08:00:00+07:00
