In [1]:
from site import check_enableusersite
from sodapy import Socrata
import pandas as pd
import numpy as np
import requests
from datetime import datetime

### Set up API

In [2]:
token = "dL93bWcHXE99lzC2Tyj7mJ4qR"
client = Socrata(
    "data.cityofchicago.org",
    token,
    timeout=1000000
)

### 2018 Rideshare

In [3]:
where2018 = "trip_start_timestamp<'2019-01-01T00:00:00.000' AND trip_start_timestamp>='2018-01-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"

# Count how many records there are in 2018
record_count2018 = client.get(
    "m6dm-c72p",
    select="COUNT(*)",
    # select="trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location",
    where=where2018,
)

In [4]:
# Record count of 2018
record_count2018

[{'COUNT': '15276428'}]

In [5]:
# Paging through 2018 data with 1,000,000 per chunk
# Takes about 1hr
start = 0
chunk_size = 1000000
results = []
while True:
    results.extend(
        client.get(
            "m6dm-c72p",
            select="trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location",
            where=where2018,
            offset=start,
            limit=chunk_size,
        )
    )
    print("chunk: " + str(start))
    start += chunk_size
    if start > int(record_count2018[0]["COUNT"]):
        break

rideshare2018 = pd.DataFrame.from_records(results)

chunk: 0
chunk: 1000000
chunk: 2000000
chunk: 3000000
chunk: 4000000
chunk: 5000000
chunk: 6000000
chunk: 7000000
chunk: 8000000
chunk: 9000000
chunk: 10000000
chunk: 11000000
chunk: 12000000
chunk: 13000000
chunk: 14000000
chunk: 15000000


In [6]:
# Change trip_start_timestamp to datetime variable
rideshare2018["trip_start_timestamp"] = pd.to_datetime(rideshare2018["trip_start_timestamp"])

# Create date variable which is only the date (remove specific time)
rideshare2018["date"] = pd.to_datetime(rideshare2018["trip_start_timestamp"]).dt.strftime("%Y-%m-%d")

# Group by date and randomly sample 0.1% of data
rideshare2018_reduced = rideshare2018.groupby("date").sample(frac=0.001)

# rideshare2018_reduced.to_csv("~/ids705/705-FinalProject/data/rideshare2018_reduced.csv")

### Jan 2019 - Jan 2020 Rideshare

In [7]:
# where_filter for each month
where2019jan = "trip_start_timestamp>='2019-01-01T00:00:00.000' AND trip_start_timestamp<'2019-02-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019feb = "trip_start_timestamp>='2019-02-01T00:00:00.000' AND trip_start_timestamp<'2019-03-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019mar = "trip_start_timestamp>='2019-03-01T00:00:00.000' AND trip_start_timestamp<'2019-04-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019apr = "trip_start_timestamp>='2019-04-01T00:00:00.000' AND trip_start_timestamp<'2019-05-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019may = "trip_start_timestamp>='2019-05-01T00:00:00.000' AND trip_start_timestamp<'2019-06-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019jun = "trip_start_timestamp>='2019-06-01T00:00:00.000' AND trip_start_timestamp<'2019-07-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019jul = "trip_start_timestamp>='2019-07-01T00:00:00.000' AND trip_start_timestamp<'2019-08-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019aug = "trip_start_timestamp>='2019-08-01T00:00:00.000' AND trip_start_timestamp<'2019-09-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019sep = "trip_start_timestamp>='2019-09-01T00:00:00.000' AND trip_start_timestamp<'2019-10-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019oct = "trip_start_timestamp>='2019-10-01T00:00:00.000' AND trip_start_timestamp<'2019-11-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019nov = "trip_start_timestamp>='2019-11-01T00:00:00.000' AND trip_start_timestamp<'2019-12-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2019dec = "trip_start_timestamp>='2019-12-01T00:00:00.000' AND trip_start_timestamp<'2020-01-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"
where2020jan = "trip_start_timestamp>='2020-01-01T00:00:00.000' AND trip_start_timestamp<'2020-02-01T00:00:00.000' AND pickup_community_area IS NOT NULL AND dropoff_community_area IS NOT NULL"

# Added to a list for convenience if running the function later in a loop (not advised, takes too long)
year2019_count = [
    where2019jan,
    where2019feb,
    where2019mar,
    where2019apr,
    where2019may,
    where2019jun,
    where2019jul,
    where2019aug,
    where2019sep,
    where2019oct,
    where2019nov,
    where2019dec,
    where2020jan,
]

# List of counts for each month
counts_bymonth = []
for month in year2019_count:
    count = client.get(
        "m6dm-c72p",
        select="COUNT(*)",
        where=month,
    )
    counts_bymonth.append(count)


In [8]:
def get_count(where_filter):
    """Count function. Returns in format of '[{'COUNT': 'XXXXXXX'}]'
    
    where_filter: filter condition for each month (string)
    """
    count = client.get(
        "m6dm-c72p",
        select="COUNT(*)",
        where=where_filter,
    )
    return count

In [9]:
def paging_data(where_filter, count):
    """Paging data with 1,000,000 per chunk.

    Play around with the chunk_size if 1 million is too large (roughly 500,000 - 1mil would be ideal).
    where_filter: filter condition (string).
    count: output from get_count function.
    
    Function returns a dataframe with selected month. 
    """
    start = 0
    chunk_size = 1000000
    results = []
    while True:
        results.extend(
            client.get(
                "m6dm-c72p",
                select="trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location",
                where=where_filter,
                offset=start,
                limit=chunk_size,
            )
        )
        start += chunk_size
        print("chunk: " + str(start))
        if start > int(count[0]["COUNT"]):
            break
    df = pd.DataFrame.from_records(results)
    return df

In [11]:
# Paging through data for each month
# Took 12hrs and did not finish so try not to use this
rideshare2019 = pd.DataFrame(columns=rideshare2018.columns)
for i in range(len(year2019_count)):
    dta = paging_data(year2019_count[i], counts_bymonth[i])
    rideshare2019 = pd.concat([rideshare2019, dta], axis=0)

KeyboardInterrupt: 

In [12]:
def reduce_data(df):
    """Takes output dataframe from paging_data and select 0.1% of data from every day.
    
    Returns a dataframe which can then be concatinated with other dataframes and returned
    as .csv. 
    """
    np.random.seed(1234)
    df["trip_start_timestamp"] = pd.to_datetime(df["trip_start_timestamp"])
    df["date"] = pd.to_datetime(df["trip_start_timestamp"]).dt.strftime("%Y-%m-%d")
    df_reduced = df.groupby("date").sample(frac=0.001)
    return df_reduced

### Example for paging through Jan 2019 data

In [None]:
df2019jan = paging_data(where2019jan, get_count(where2019jan))
df2019jan_reduced = reduce_data(df2019jan)
df2019jan_reduced.to_csv("path")

In [13]:
df2019jul = paging_data(where2019jul, get_count(where2019jul))


chunk: 1000000
chunk: 2000000
chunk: 3000000
chunk: 4000000
chunk: 5000000
chunk: 6000000
chunk: 7000000
chunk: 8000000


In [14]:
df2019jul_reduced = reduce_data(df2019jul)


In [19]:
df2019jul_reduced.to_csv("jul.csv")

In [16]:
df2019aug = paging_data(where2019aug, get_count(where2019aug))


chunk: 1000000
chunk: 2000000
chunk: 3000000
chunk: 4000000
chunk: 5000000
chunk: 6000000
chunk: 7000000
chunk: 8000000
chunk: 9000000


In [17]:
df2019aug_reduced = reduce_data(df2019aug)


In [20]:
df2019aug_reduced.to_csv("aug.csv")

In [21]:
df2019sep = paging_data(where2019sep, get_count(where2019sep))


chunk: 1000000
chunk: 2000000
chunk: 3000000
chunk: 4000000
chunk: 5000000
chunk: 6000000
chunk: 7000000
chunk: 8000000


In [22]:
df2019sep_reduced = reduce_data(df2019sep)


In [23]:
df2019sep_reduced.to_csv("sep.csv")

In [24]:
import pandas as pd

In [26]:
aug = pd.read_csv('./aug.csv')
aug.head()

Unnamed: 0.1,Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,229556,2019-08-01 21:30:00,2019-08-01T22:00:00.000,2123,18.07364,76,8,22.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-08-01
1,108786,2019-08-01 13:15:00,2019-08-01T14:15:00.000,3512,16.384596,76,28,37.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.64280846...",2019-08-01
2,157951,2019-08-01 17:00:00,2019-08-01T17:15:00.000,1321,8.392442,75,46,15.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.67125035...","{'type': 'Point', 'coordinates': [-87.57260730...",2019-08-01
3,97535,2019-08-01 12:30:00,2019-08-01T13:00:00.000,1865,8.831748,77,24,10.0,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.66418824...","{'type': 'Point', 'coordinates': [-87.66533765...",2019-08-01
4,229507,2019-08-01 21:30:00,2019-08-01T21:45:00.000,869,1.96123,8,32,7.5,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.62099291...",2019-08-01


In [27]:
jul = pd.read_csv('./jul.csv')
jul.head()

Unnamed: 0.1,Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,119893,2019-07-01 15:45:00,2019-07-01T16:00:00.000,1517,14.603581,49,8,30.0,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.62336651...","{'type': 'Point', 'coordinates': [-87.63330803...",2019-07-01
1,100423,2019-07-01 14:00:00,2019-07-01T14:15:00.000,1120,6.951304,3,8,10.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.65587878...","{'type': 'Point', 'coordinates': [-87.63330803...",2019-07-01
2,172401,2019-07-01 20:15:00,2019-07-01T20:30:00.000,528,1.992832,28,8,5.0,1,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.64295866...","{'type': 'Point', 'coordinates': [-87.63186394...",2019-07-01
3,82329,2019-07-01 12:00:00,2019-07-01T12:15:00.000,980,4.968946,77,5,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.66341640...","{'type': 'Point', 'coordinates': [-87.68383494...",2019-07-01
4,148706,2019-07-01 18:15:00,2019-07-01T18:45:00.000,1440,5.776235,32,7,0.0,0,0.72,True,2,"{'type': 'Point', 'coordinates': [-87.63274648...","{'type': 'Point', 'coordinates': [-87.67380723...",2019-07-01


In [32]:
sep = pd.read_csv('./sep.csv')
sep.head()
# len(sep)

Unnamed: 0.1,Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,208531,2019-09-01 19:15:00,2019-09-01T19:30:00.000,1035,4.27046,6,16,10.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.65680390...","{'type': 'Point', 'coordinates': [-87.73089878...",2019-09-01
1,55406,2019-09-01 05:00:00,2019-09-01T05:15:00.000,849,5.485975,6,24,15.0,0,0.0,True,3,"{'type': 'Point', 'coordinates': [-87.65680390...","{'type': 'Point', 'coordinates': [-87.66474583...",2019-09-01
2,97464,2019-09-01 11:30:00,2019-09-01T11:45:00.000,993,4.817939,33,24,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.61735800...","{'type': 'Point', 'coordinates': [-87.67094507...",2019-09-01
3,45640,2019-09-01 03:15:00,2019-09-01T03:30:00.000,980,4.683007,8,6,10.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.62621490...","{'type': 'Point', 'coordinates': [-87.64302280...",2019-09-01
4,107204,2019-09-01 12:15:00,2019-09-01T12:30:00.000,461,1.514134,8,8,5.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63784420...","{'type': 'Point', 'coordinates': [-87.62665890...",2019-09-01


In [33]:
new = pd.concat([aug, jul, sep] , axis = 0)
new.head()
# len(new)

Unnamed: 0.1,Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,229556,2019-08-01 21:30:00,2019-08-01T22:00:00.000,2123,18.07364,76,8,22.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-08-01
1,108786,2019-08-01 13:15:00,2019-08-01T14:15:00.000,3512,16.384596,76,28,37.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.64280846...",2019-08-01
2,157951,2019-08-01 17:00:00,2019-08-01T17:15:00.000,1321,8.392442,75,46,15.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.67125035...","{'type': 'Point', 'coordinates': [-87.57260730...",2019-08-01
3,97535,2019-08-01 12:30:00,2019-08-01T13:00:00.000,1865,8.831748,77,24,10.0,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.66418824...","{'type': 'Point', 'coordinates': [-87.66533765...",2019-08-01
4,229507,2019-08-01 21:30:00,2019-08-01T21:45:00.000,869,1.96123,8,32,7.5,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.62099291...",2019-08-01


In [34]:
new.columns

Index(['Unnamed: 0', 'trip_start_timestamp', 'trip_end_timestamp',
       'trip_seconds', 'trip_miles', 'pickup_community_area',
       'dropoff_community_area', 'fare', 'tip', 'additional_charges',
       'shared_trip_authorized', 'trips_pooled', 'pickup_centroid_location',
       'dropoff_centroid_location', 'date'],
      dtype='object')

In [35]:
new1 = new.drop('Unnamed: 0', axis=1)
new1

Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,2019-08-01 21:30:00,2019-08-01T22:00:00.000,2123,18.073640,76,8,22.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-08-01
1,2019-08-01 13:15:00,2019-08-01T14:15:00.000,3512,16.384596,76,28,37.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.64280846...",2019-08-01
2,2019-08-01 17:00:00,2019-08-01T17:15:00.000,1321,8.392442,75,46,15.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.67125035...","{'type': 'Point', 'coordinates': [-87.57260730...",2019-08-01
3,2019-08-01 12:30:00,2019-08-01T13:00:00.000,1865,8.831748,77,24,10.0,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.66418824...","{'type': 'Point', 'coordinates': [-87.66533765...",2019-08-01
4,2019-08-01 21:30:00,2019-08-01T21:45:00.000,869,1.961230,8,32,7.5,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.62099291...",2019-08-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7652,2019-09-30 14:45:00,2019-09-30T15:00:00.000,705,4.704218,42,38,10.0,3,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.57994824...","{'type': 'Point', 'coordinates': [-87.60894927...",2019-09-30
7653,2019-09-30 01:15:00,2019-09-30T01:30:00.000,657,5.676132,33,41,10.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.62033462...","{'type': 'Point', 'coordinates': [-87.59231085...",2019-09-30
7654,2019-09-30 14:15:00,2019-09-30T14:30:00.000,700,2.411363,8,8,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-09-30
7655,2019-09-30 20:45:00,2019-09-30T21:15:00.000,1759,14.830000,76,77,22.5,6,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.91362459...","{'type': 'Point', 'coordinates': [-87.66341640...",2019-09-30


In [39]:
# new.reset_index(inplace=True)

ValueError: cannot insert level_0, already exists

In [46]:
new1.head()

Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,2019-08-01 21:30:00,2019-08-01T22:00:00.000,2123,18.07364,76,8,22.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-08-01
1,2019-08-01 13:15:00,2019-08-01T14:15:00.000,3512,16.384596,76,28,37.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.64280846...",2019-08-01
2,2019-08-01 17:00:00,2019-08-01T17:15:00.000,1321,8.392442,75,46,15.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.67125035...","{'type': 'Point', 'coordinates': [-87.57260730...",2019-08-01
3,2019-08-01 12:30:00,2019-08-01T13:00:00.000,1865,8.831748,77,24,10.0,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.66418824...","{'type': 'Point', 'coordinates': [-87.66533765...",2019-08-01
4,2019-08-01 21:30:00,2019-08-01T21:45:00.000,869,1.96123,8,32,7.5,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.62099291...",2019-08-01


In [48]:
new1.reset_index(inplace=True)

In [49]:
new1.tail()

Unnamed: 0,index,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
23650,7652,2019-09-30 14:45:00,2019-09-30T15:00:00.000,705,4.704218,42,38,10.0,3,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.57994824...","{'type': 'Point', 'coordinates': [-87.60894927...",2019-09-30
23651,7653,2019-09-30 01:15:00,2019-09-30T01:30:00.000,657,5.676132,33,41,10.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.62033462...","{'type': 'Point', 'coordinates': [-87.59231085...",2019-09-30
23652,7654,2019-09-30 14:15:00,2019-09-30T14:30:00.000,700,2.411363,8,8,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-09-30
23653,7655,2019-09-30 20:45:00,2019-09-30T21:15:00.000,1759,14.83,76,77,22.5,6,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.91362459...","{'type': 'Point', 'coordinates': [-87.66341640...",2019-09-30
23654,7656,2019-09-30 09:45:00,2019-09-30T09:45:00.000,716,1.611987,8,32,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63186394...","{'type': 'Point', 'coordinates': [-87.62217293...",2019-09-30


In [50]:
new1.drop('index', inplace=True, axis=1)

In [51]:
new1.tail()

Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
23650,2019-09-30 14:45:00,2019-09-30T15:00:00.000,705,4.704218,42,38,10.0,3,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.57994824...","{'type': 'Point', 'coordinates': [-87.60894927...",2019-09-30
23651,2019-09-30 01:15:00,2019-09-30T01:30:00.000,657,5.676132,33,41,10.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.62033462...","{'type': 'Point', 'coordinates': [-87.59231085...",2019-09-30
23652,2019-09-30 14:15:00,2019-09-30T14:30:00.000,700,2.411363,8,8,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-09-30
23653,2019-09-30 20:45:00,2019-09-30T21:15:00.000,1759,14.83,76,77,22.5,6,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.91362459...","{'type': 'Point', 'coordinates': [-87.66341640...",2019-09-30
23654,2019-09-30 09:45:00,2019-09-30T09:45:00.000,716,1.611987,8,32,7.5,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63186394...","{'type': 'Point', 'coordinates': [-87.62217293...",2019-09-30


In [43]:
new.drop(['level_0', 'index', 'Unnamed: 0'], axis=1, inplace=True)

In [44]:
new.head()

Unnamed: 0,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tip,additional_charges,shared_trip_authorized,trips_pooled,pickup_centroid_location,dropoff_centroid_location,date
0,2019-08-01 21:30:00,2019-08-01T22:00:00.000,2123,18.07364,76,8,22.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.61886835...",2019-08-01
1,2019-08-01 13:15:00,2019-08-01T14:15:00.000,3512,16.384596,76,28,37.5,0,7.55,False,1,"{'type': 'Point', 'coordinates': [-87.90303966...","{'type': 'Point', 'coordinates': [-87.64280846...",2019-08-01
2,2019-08-01 17:00:00,2019-08-01T17:15:00.000,1321,8.392442,75,46,15.0,0,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.67125035...","{'type': 'Point', 'coordinates': [-87.57260730...",2019-08-01
3,2019-08-01 12:30:00,2019-08-01T13:00:00.000,1865,8.831748,77,24,10.0,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.66418824...","{'type': 'Point', 'coordinates': [-87.66533765...",2019-08-01
4,2019-08-01 21:30:00,2019-08-01T21:45:00.000,869,1.96123,8,32,7.5,2,2.55,False,1,"{'type': 'Point', 'coordinates': [-87.63576009...","{'type': 'Point', 'coordinates': [-87.62099291...",2019-08-01


In [45]:
new.to_csv("jul_to_aug.csv")

In [52]:
new1.to_csv('jul_to_aug1.csv')