In [42]:
import pandas as pd
import re

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)

In [3]:
data = pd.read_csv('./data/farmersmarkets.csv', low_memory=False)

In [4]:
data.columns

Index(['FMID', 'MarketName', 'Website', 'Facebook', 'Twitter', 'Youtube',
       'OtherMedia', 'Street', 'City', 'County', 'State', 'zip',
       'Season1Date 1', 'Season1Time 1', 'Season1Time 2', 'Season1Time 3',
       'Season1Time 4', 'Season1Time 5', 'Season1Time 6', 'Season1Time 7',
       'Season1Time 8', 'Season2Date 1', 'Season2Time 1', 'Season2Time 2',
       'Season2Time 3', 'Season2Time 4', 'Season2Time 5', 'Season2Time 6',
       'Season2Time 7', 'Season2Time 8', 'Season3Date 1', 'Season3Time 1',
       'Season3Time 2', 'Season3Time 3', 'Season3Time 4', 'Season3Time 5',
       'Season3Time 6', 'Season3Time 7', 'Season4Date 1', 'Season4Time 1',
       'Season4Time 2', 'Season4Time 3', 'x', 'y', 'Location', 'Credit', 'WIC',
       'WICcash', 'SFMNP', 'SNAP', 'Organic', 'Bakedgoods', 'Cheese', 'Crafts',
       'Flowers', 'Eggs', 'Seafood', 'Herbs', 'Vegetables', 'Honey', 'Jams',
       'Maple', 'Meat', 'Nursery', 'Nuts', 'Plants', 'Poultry', 'Prepared',
       'Soap', 'Trees',

## Seasons

In [5]:
seasons = data[['FMID', 'Season1Date 1', 'Season2Date 1', 'Season3Date 1', 'Season4Date 1']]
seasons

Unnamed: 0,FMID,Season1Date 1,Season2Date 1,Season3Date 1,Season4Date 1
0,1012063,06/08/2016 to 10/12/2016,,,
1,1011871,06/25/2016 to 10/01/2016,,,
2,1011878,05/04/2016 to 10/12/2016,,,
3,1009364,,,,
4,1010691,04/02/2014 to 11/30/2014,,,
...,...,...,...,...,...
8660,1004767,07/04/2014 to 10/24/2014,,,
8661,1000778,06/07/2016 to 10/04/2016,,,
8662,1012380,05/07/2016 to 10/15/2016,,,
8663,1004686,,,,


In [6]:
seasons = seasons.melt(
    id_vars=["FMID"], 
    var_name="SeasonNum",
    value_name="SeasonDate"
)[['FMID', 'SeasonDate']].dropna().reset_index(drop=True)

seasons

Unnamed: 0,FMID,SeasonDate
0,1012063,06/08/2016 to 10/12/2016
1,1011871,06/25/2016 to 10/01/2016
2,1011878,05/04/2016 to 10/12/2016
3,1010691,04/02/2014 to 11/30/2014
4,1002454,July to November
...,...,...
5896,1012508,01/15/2017 to 01/15/2017
5897,1005991,05/01/2015 to
5898,1010118,04/14/2016 to 04/14/2016
5899,1001875,05/28/2016 to 09/24/2016


### Approach #1

In [7]:
seasons['SeasonDateList'] = seasons['SeasonDate'].str.split(' to ')
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList
0,1012063,06/08/2016 to 10/12/2016,"[06/08/2016, 10/12/2016]"
1,1011871,06/25/2016 to 10/01/2016,"[06/25/2016, 10/01/2016]"
2,1011878,05/04/2016 to 10/12/2016,"[05/04/2016, 10/12/2016]"
3,1010691,04/02/2014 to 11/30/2014,"[04/02/2014, 11/30/2014]"
4,1002454,July to November,"[July, November]"
...,...,...,...
5896,1012508,01/15/2017 to 01/15/2017,"[01/15/2017, 01/15/2017]"
5897,1005991,05/01/2015 to,"[05/01/2015, ]"
5898,1010118,04/14/2016 to 04/14/2016,"[04/14/2016, 04/14/2016]"
5899,1001875,05/28/2016 to 09/24/2016,"[05/28/2016, 09/24/2016]"


In [8]:
seasons.loc[[len(d) > 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList


In [9]:
seasons.loc[[len(d) < 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
58,1004762,10/25/2014 to,[10/25/2014 to]
66,1011322,05/19/2016,[05/19/2016]
79,1004846,05/21/2013 to,[05/21/2013 to]
90,1011910,04/18/2016 to,[04/18/2016 to]
250,1011092,06/07/2016 to,[06/07/2016 to]
...,...,...,...
5286,1005677,07/28/2016 to,[07/28/2016 to]
5303,1010125,12/01/2014 to,[12/01/2014 to]
5391,1011180,06/07/2015,[06/07/2015]
5416,1001403,10/03/2015,[10/03/2015]


In [11]:
seasons.loc[[' ' in d for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList


### Approach #2

In [12]:
seasons['SeasonDateList'] = seasons['SeasonDate'].str.split('to')
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList
0,1012063,06/08/2016 to 10/12/2016,"[06/08/2016 , 10/12/2016]"
1,1011871,06/25/2016 to 10/01/2016,"[06/25/2016 , 10/01/2016]"
2,1011878,05/04/2016 to 10/12/2016,"[05/04/2016 , 10/12/2016]"
3,1010691,04/02/2014 to 11/30/2014,"[04/02/2014 , 11/30/2014]"
4,1002454,July to November,"[July , November]"
...,...,...,...
5896,1012508,01/15/2017 to 01/15/2017,"[01/15/2017 , 01/15/2017]"
5897,1005991,05/01/2015 to,"[05/01/2015 , ]"
5898,1010118,04/14/2016 to 04/14/2016,"[04/14/2016 , 04/14/2016]"
5899,1001875,05/28/2016 to 09/24/2016,"[05/28/2016 , 09/24/2016]"


In [13]:
seasons.loc[[len(d) > 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
43,1001961,May to October,"[May , Oc, ber]"
53,1004598,May to October,"[May , Oc, ber]"
60,1003469,"June 19, 2012 to October 21, 2012","[June 19, 2012 , Oc, ber 21, 2012]"
102,1002836,April to October,"[April , Oc, ber]"
115,1001100,June to October,"[June , Oc, ber]"
...,...,...,...
5380,1003731,April to October,"[April , Oc, ber]"
5618,1001431,October to December,"[Oc, ber , December]"
5817,1002222,October to November,"[Oc, ber , November]"
5837,1002854,September to October,"[September , Oc, ber]"


In [14]:
seasons.loc[[len(d) < 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
66,1011322,05/19/2016,[05/19/2016]
548,1012171,06/25/2016,[06/25/2016]
807,1002353,05/23/2014,[05/23/2014]
1285,1011887,04/17/2016,[04/17/2016]
1552,1011719,12/05/2015,[12/05/2015]
...,...,...,...
5228,1003961,07/13/2013,[07/13/2013]
5250,1011897,01/30/2016,[01/30/2016]
5391,1011180,06/07/2015,[06/07/2015]
5416,1001403,10/03/2015,[10/03/2015]


In [15]:
seasons.loc[[' ' in d for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
5400,1008251,06/03/2014 to,"[06/03/2014 , ]"
5465,1000158,12/05/2015 to,"[12/05/2015 , ]"
5512,1010414,03/01/2015 to,"[03/01/2015 , ]"
5516,1010802,11/17/2015 to,"[11/17/2015 , ]"
5537,1009921,10/10/2014 to,"[10/10/2014 , ]"
...,...,...,...
5745,1012351,09/16/2016 to,"[09/16/2016 , ]"
5775,1006826,11/01/2012 to,"[11/01/2012 , ]"
5876,1011004,09/24/2016 to,"[09/24/2016 , ]"
5887,1008842,11/27/2016 to,"[11/27/2016 , ]"


### Approach #3

In [16]:
seasons['SeasonDateList'] = [[d for d in s if d] for s in seasons['SeasonDate'].str.split(' to')]
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList
0,1012063,06/08/2016 to 10/12/2016,"[06/08/2016, 10/12/2016]"
1,1011871,06/25/2016 to 10/01/2016,"[06/25/2016, 10/01/2016]"
2,1011878,05/04/2016 to 10/12/2016,"[05/04/2016, 10/12/2016]"
3,1010691,04/02/2014 to 11/30/2014,"[04/02/2014, 11/30/2014]"
4,1002454,July to November,"[July, November]"
...,...,...,...
5896,1012508,01/15/2017 to 01/15/2017,"[01/15/2017, 01/15/2017]"
5897,1005991,05/01/2015 to,"[05/01/2015, ]"
5898,1010118,04/14/2016 to 04/14/2016,"[04/14/2016, 04/14/2016]"
5899,1001875,05/28/2016 to 09/24/2016,"[05/28/2016, 09/24/2016]"


In [17]:
seasons.loc[[len(d) > 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList


In [18]:
seasons.loc[[len(d) < 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
58,1004762,10/25/2014 to,[10/25/2014]
66,1011322,05/19/2016,[05/19/2016]
79,1004846,05/21/2013 to,[05/21/2013]
90,1011910,04/18/2016 to,[04/18/2016]
250,1011092,06/07/2016 to,[06/07/2016]
...,...,...,...
5286,1005677,07/28/2016 to,[07/28/2016]
5303,1010125,12/01/2014 to,[12/01/2014]
5391,1011180,06/07/2015,[06/07/2015]
5416,1001403,10/03/2015,[10/03/2015]


In [19]:
seasons.loc[[' ' in d for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
5400,1008251,06/03/2014 to,"[06/03/2014, ]"
5465,1000158,12/05/2015 to,"[12/05/2015, ]"
5512,1010414,03/01/2015 to,"[03/01/2015, ]"
5516,1010802,11/17/2015 to,"[11/17/2015, ]"
5537,1009921,10/10/2014 to,"[10/10/2014, ]"
...,...,...,...
5745,1012351,09/16/2016 to,"[09/16/2016, ]"
5775,1006826,11/01/2012 to,"[11/01/2012, ]"
5876,1011004,09/24/2016 to,"[09/24/2016, ]"
5887,1008842,11/27/2016 to,"[11/27/2016, ]"


### Approach #4

In [20]:
seasons['SeasonDate'] = [s.strip() for s in seasons['SeasonDate']]
seasons['SeasonDateList'] = [[d for d in s if d] for s in seasons['SeasonDate'].str.split(' to')]
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList
0,1012063,06/08/2016 to 10/12/2016,"[06/08/2016, 10/12/2016]"
1,1011871,06/25/2016 to 10/01/2016,"[06/25/2016, 10/01/2016]"
2,1011878,05/04/2016 to 10/12/2016,"[05/04/2016, 10/12/2016]"
3,1010691,04/02/2014 to 11/30/2014,"[04/02/2014, 11/30/2014]"
4,1002454,July to November,"[July, November]"
...,...,...,...
5896,1012508,01/15/2017 to 01/15/2017,"[01/15/2017, 01/15/2017]"
5897,1005991,05/01/2015 to,[05/01/2015]
5898,1010118,04/14/2016 to 04/14/2016,"[04/14/2016, 04/14/2016]"
5899,1001875,05/28/2016 to 09/24/2016,"[05/28/2016, 09/24/2016]"


In [21]:
seasons.loc[[len(d) > 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList


In [22]:
seasons.loc[[len(d) < 2 for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList
58,1004762,10/25/2014 to,[10/25/2014]
66,1011322,05/19/2016,[05/19/2016]
79,1004846,05/21/2013 to,[05/21/2013]
90,1011910,04/18/2016 to,[04/18/2016]
250,1011092,06/07/2016 to,[06/07/2016]
...,...,...,...
5745,1012351,09/16/2016 to,[09/16/2016]
5775,1006826,11/01/2012 to,[11/01/2012]
5876,1011004,09/24/2016 to,[09/24/2016]
5887,1008842,11/27/2016 to,[11/27/2016]


In [23]:
seasons.loc[[' ' in d for d in seasons.SeasonDateList],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList


In [24]:
seasons['SeasonStart'] = [s[0] for s in seasons['SeasonDateList']]
seasons['SeasonEnd'] = [s[1] if len(s) > 1 else s[0] for s in seasons['SeasonDateList']]

In [25]:
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd
0,1012063,06/08/2016 to 10/12/2016,"[06/08/2016, 10/12/2016]",06/08/2016,10/12/2016
1,1011871,06/25/2016 to 10/01/2016,"[06/25/2016, 10/01/2016]",06/25/2016,10/01/2016
2,1011878,05/04/2016 to 10/12/2016,"[05/04/2016, 10/12/2016]",05/04/2016,10/12/2016
3,1010691,04/02/2014 to 11/30/2014,"[04/02/2014, 11/30/2014]",04/02/2014,11/30/2014
4,1002454,July to November,"[July, November]",July,November
...,...,...,...,...,...
5896,1012508,01/15/2017 to 01/15/2017,"[01/15/2017, 01/15/2017]",01/15/2017,01/15/2017
5897,1005991,05/01/2015 to,[05/01/2015],05/01/2015,05/01/2015
5898,1010118,04/14/2016 to 04/14/2016,"[04/14/2016, 04/14/2016]",04/14/2016,04/14/2016
5899,1001875,05/28/2016 to 09/24/2016,"[05/28/2016, 09/24/2016]",05/28/2016,09/24/2016


In [26]:
seasons = seasons.sort_values('FMID')
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd
1397,1000003,05/31/2014 to 10/31/2014,"[05/31/2014, 10/31/2014]",05/31/2014,10/31/2014
5642,1000008,05/21/2016 to 10/29/2016,"[05/21/2016, 10/29/2016]",05/21/2016,10/29/2016
3123,1000008,05/23/2015 to 11/01/2015,"[05/23/2015, 11/01/2015]",05/23/2015,11/01/2015
5865,1000008,11/05/2016 to 04/29/2017,"[11/05/2016, 04/29/2017]",11/05/2016,04/29/2017
4390,1000009,06/14/2014 to 10/11/2014,"[06/14/2014, 10/11/2014]",06/14/2014,10/11/2014
...,...,...,...,...,...
2266,1012844,05/06/2017 to 10/28/2017,"[05/06/2017, 10/28/2017]",05/06/2017,10/28/2017
3356,1016768,05/07/2016 to 10/01/2016,"[05/07/2016, 10/01/2016]",05/07/2016,10/01/2016
2539,1016770,10/02/2016 to 02/04/2017,"[10/02/2016, 02/04/2017]",10/02/2016,02/04/2017
1577,2000005,01/01/2014 to 12/31/2014,"[01/01/2014, 12/31/2014]",01/01/2014,12/31/2014


In [27]:
seasons['MostRecentOpeningDate'] = pd.to_datetime(seasons['SeasonEnd'], errors = 'coerce')
seasons

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
1397,1000003,05/31/2014 to 10/31/2014,"[05/31/2014, 10/31/2014]",05/31/2014,10/31/2014,2014-10-31
5642,1000008,05/21/2016 to 10/29/2016,"[05/21/2016, 10/29/2016]",05/21/2016,10/29/2016,2016-10-29
3123,1000008,05/23/2015 to 11/01/2015,"[05/23/2015, 11/01/2015]",05/23/2015,11/01/2015,2015-11-01
5865,1000008,11/05/2016 to 04/29/2017,"[11/05/2016, 04/29/2017]",11/05/2016,04/29/2017,2017-04-29
4390,1000009,06/14/2014 to 10/11/2014,"[06/14/2014, 10/11/2014]",06/14/2014,10/11/2014,2014-10-11
...,...,...,...,...,...,...
2266,1012844,05/06/2017 to 10/28/2017,"[05/06/2017, 10/28/2017]",05/06/2017,10/28/2017,2017-10-28
3356,1016768,05/07/2016 to 10/01/2016,"[05/07/2016, 10/01/2016]",05/07/2016,10/01/2016,2016-10-01
2539,1016770,10/02/2016 to 02/04/2017,"[10/02/2016, 02/04/2017]",10/02/2016,02/04/2017,2017-02-04
1577,2000005,01/01/2014 to 12/31/2014,"[01/01/2014, 12/31/2014]",01/01/2014,12/31/2014,2014-12-31


## Validation

In [28]:
seasons.loc[seasons['FMID'] == 1000165, ]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
4452,1000165,March to November,"[March, November]",March,November,NaT


In [29]:
seasons.loc[seasons['FMID'] == 1000788, ]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
5135,1000788,"July 9, 2012 to October 29, 2012","[July 9, 2012, October 29, 2012]","July 9, 2012","October 29, 2012",2012-10-29


In [30]:
seasons.loc[seasons['FMID'] == 1000961, ] 

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
5114,1000961,05/01/2015 to,[05/01/2015],05/01/2015,05/01/2015,2015-05-01


In [31]:
seasons.loc[seasons['FMID'] == 1001139, ] 

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
2196,1001139,"April to Sept 24, 2011","[April, Sept 24, 2011]",April,"Sept 24, 2011",2011-09-24


In [32]:
last_season = seasons.groupby('FMID')['MostRecentOpeningDate'].max().reset_index()
last_season

Unnamed: 0,FMID,MostRecentOpeningDate
0,1000003,2014-10-31
1,1000008,2017-04-29
2,1000009,2014-10-11
3,1000010,NaT
4,1000011,NaT
...,...,...
5387,1012844,2017-10-28
5388,1016768,2016-10-01
5389,1016770,2017-02-04
5390,2000005,2014-12-31


## Before & After Comparison - IC Violation Check

In [35]:
seasons.loc[[',' in s for s in seasons.SeasonEnd],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
1476,1000107,"May 5, 2012 to Oct 6, 2012","[May 5, 2012, Oct 6, 2012]","May 5, 2012","Oct 6, 2012",2012-10-06
5135,1000788,"July 9, 2012 to October 29, 2012","[July 9, 2012, October 29, 2012]","July 9, 2012","October 29, 2012",2012-10-29
5134,1000789,"July 12, 2012 to October 25, 2012","[July 12, 2012, October 25, 2012]","July 12, 2012","October 25, 2012",2012-10-25
2198,1001137,"April to November 4, 2011","[April, November 4, 2011]",April,"November 4, 2011",2011-11-04
2196,1001139,"April to Sept 24, 2011","[April, Sept 24, 2011]",April,"Sept 24, 2011",2011-09-24
...,...,...,...,...,...,...
4306,1005772,"May 7, 2011 to October 15, 2011","[May 7, 2011, October 15, 2011]","May 7, 2011","October 15, 2011",2011-10-15
2130,1005993,"May 25, 2012 to August 31, 2012","[May 25, 2012, August 31, 2012]","May 25, 2012","August 31, 2012",2012-08-31
1513,1006135,"August 2, 2012 to September 27, 2012","[August 2, 2012, September 27, 2012]","August 2, 2012","September 27, 2012",2012-09-27
1849,1006688,"June 30, 2012 to September 1 , 2012","[June 30, 2012, September 1 , 2012]","June 30, 2012","September 1 , 2012",2012-09-01


In [39]:
seasons.loc[[',' in str(s) for s in seasons.MostRecentOpeningDate],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate


In [40]:
seasons.loc[['/' in s for s in seasons.SeasonEnd],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
1397,1000003,05/31/2014 to 10/31/2014,"[05/31/2014, 10/31/2014]",05/31/2014,10/31/2014,2014-10-31
5642,1000008,05/21/2016 to 10/29/2016,"[05/21/2016, 10/29/2016]",05/21/2016,10/29/2016,2016-10-29
3123,1000008,05/23/2015 to 11/01/2015,"[05/23/2015, 11/01/2015]",05/23/2015,11/01/2015,2015-11-01
5865,1000008,11/05/2016 to 04/29/2017,"[11/05/2016, 04/29/2017]",11/05/2016,04/29/2017,2017-04-29
4390,1000009,06/14/2014 to 10/11/2014,"[06/14/2014, 10/11/2014]",06/14/2014,10/11/2014,2014-10-11
...,...,...,...,...,...,...
2266,1012844,05/06/2017 to 10/28/2017,"[05/06/2017, 10/28/2017]",05/06/2017,10/28/2017,2017-10-28
3356,1016768,05/07/2016 to 10/01/2016,"[05/07/2016, 10/01/2016]",05/07/2016,10/01/2016,2016-10-01
2539,1016770,10/02/2016 to 02/04/2017,"[10/02/2016, 02/04/2017]",10/02/2016,02/04/2017,2017-02-04
1577,2000005,01/01/2014 to 12/31/2014,"[01/01/2014, 12/31/2014]",01/01/2014,12/31/2014,2014-12-31


In [41]:
seasons.loc[['/' in str(s) for s in seasons.MostRecentOpeningDate],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate


In [46]:
re.search(r'\d', str('1')) is None

False

In [48]:
seasons.loc[[re.search(r'\d', s) is None for s in seasons.SeasonEnd],]

Unnamed: 0,FMID,SeasonDate,SeasonDateList,SeasonStart,SeasonEnd,MostRecentOpeningDate
3643,1000010,April to October,"[April, October]",April,October,NaT
1860,1000011,April to October,"[April, October]",April,October,NaT
4629,1000016,May to October,"[May, October]",May,October,NaT
1447,1000022,January to December,"[January, December]",January,December,NaT
483,1000023,January to December,"[January, December]",January,December,NaT
...,...,...,...,...,...,...
769,1007672,May to October,"[May, October]",May,October,NaT
601,1007741,April to October,"[April, October]",April,October,NaT
834,1007752,May to November,"[May, November]",May,November,NaT
4836,1008037,August to August,"[August, August]",August,August,NaT


In [62]:
seasons.loc[[re.search(r'\d', s) is None for s in seasons.SeasonEnd], 'MostRecentOpeningDate'].unique()

array(['NaT'], dtype='datetime64[ns]')

# Save Results

In [33]:
last_season.to_csv('./data/seasons.csv', index=False)