https://www.datacamp.com/courses/manipulating-dataframes-with-pandas
# 1. Slicing & Filtering

In [151]:
import pandas as pd
import numpy as np
from scipy.stats import zscore

In [9]:
election = pd.read_csv('datasets/pennsylvania2012_turnout.csv', index_col='county')
election.head()

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adams,PA,41973,35.482334,63.112001,Romney,61156,68.632677,27.629667
Allegheny,PA,614671,56.640219,42.18582,Obama,924351,66.497575,14.454399
Armstrong,PA,28322,30.696985,67.901278,Romney,42147,67.19814,37.204293
Beaver,PA,80015,46.032619,52.63763,Romney,115157,69.483401,6.605012
Bedford,PA,21444,22.057452,76.98657,Romney,32189,66.619031,54.929118


### We must index the county to use the .loc['row', 'column'] function

In [11]:
election.loc['Bedford', 'winner']

'Romney'

In [14]:
election.iloc[4, 4]

'Romney'

In [16]:
results = election[['winner', 'total', 'voters']]
results.head()

Unnamed: 0_level_0,winner,total,voters
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adams,Romney,41973,61156
Allegheny,Obama,614671,924351
Armstrong,Romney,28322,42147
Beaver,Romney,80015,115157
Bedford,Romney,21444,32189


In [23]:
sales = pd.read_csv('datasets/sales/sales.csv',index_col='month')
sales

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


In [24]:
sales.spam[1:4]

month
Feb    31
Mar    72
Apr    20
Name: spam, dtype: int64

In [25]:
sales.loc[:, 'salt':'spam']

Unnamed: 0_level_0,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1
Jan,12.0,17
Feb,50.0,31
Mar,89.0,72
Apr,87.0,20
May,,52
Jun,60.0,55


### We must index the month to use the .loc['row', 'column']

In [27]:
sales.loc['Feb':'Apr', :]

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20


In [28]:
sales.loc['Feb':'May', ['eggs', 'spam']]

Unnamed: 0_level_0,eggs,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1
Feb,110,31
Mar,221,72
Apr,77,20
May,132,52


In [29]:
election.head()

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adams,PA,41973,35.482334,63.112001,Romney,61156,68.632677,27.629667
Allegheny,PA,614671,56.640219,42.18582,Obama,924351,66.497575,14.454399
Armstrong,PA,28322,30.696985,67.901278,Romney,42147,67.19814,37.204293
Beaver,PA,80015,46.032619,52.63763,Romney,115157,69.483401,6.605012
Bedford,PA,21444,22.057452,76.98657,Romney,32189,66.619031,54.929118


In [30]:
election.loc['Perry':'Potter', :]

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Perry,PA,18240,29.769737,68.591009,Romney,27245,66.948064,38.821272
Philadelphia,PA,653598,85.224251,14.051451,Obama,1099197,59.461407,71.1728
Pike,PA,23164,43.904334,54.882576,Romney,41840,55.363289,10.978242
Potter,PA,7205,26.259542,72.158223,Romney,10913,66.022175,45.898681


### Slice the row labels in reverse order: df.loc['b':'a':-1]

In [33]:
election.loc['Potter':'Perry':-1,:]

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Potter,PA,7205,26.259542,72.158223,Romney,10913,66.022175,45.898681
Pike,PA,23164,43.904334,54.882576,Romney,41840,55.363289,10.978242
Philadelphia,PA,653598,85.224251,14.051451,Obama,1099197,59.461407,71.1728
Perry,PA,18240,29.769737,68.591009,Romney,27245,66.948064,38.821272


In [34]:
election.loc[['Philadelphia', 'Centre', 'Fulton'], ['winner', 'Obama', 'Romney']]

Unnamed: 0_level_0,winner,Obama,Romney
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Philadelphia,Obama,85.224251,14.051451
Centre,Romney,48.948416,48.977486
Fulton,Romney,21.096291,77.748861


# Filtering DataFrames

In [35]:
sales.head()

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52


In [36]:
sales.spam > 60

month
Jan    False
Feb    False
Mar     True
Apr    False
May    False
Jun    False
Name: spam, dtype: bool

In [41]:
sales[(sales.spam >=30)&(sales.eggs < 200)]

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Feb,110,50.0,31
May,132,,52


In [44]:
sales[(sales.salt >= 70)|(sales.spam < 50)]

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20


In [45]:
sales1 = sales.copy()

In [46]:
sales1['bacon'] = [0,0,50,60,70,80]

In [47]:
sales1

Unnamed: 0_level_0,eggs,salt,spam,bacon
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jan,47,12.0,17,0
Feb,110,50.0,31,0
Mar,221,89.0,72,50
Apr,77,87.0,20,60
May,132,,52,70
Jun,205,60.0,55,80


### return columns that have all non-zero values with df.all( )

In [48]:
sales1.loc[:, sales1.all()]

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


In [49]:
sales1.loc[:, sales1.any()]

Unnamed: 0_level_0,eggs,salt,spam,bacon
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jan,47,12.0,17,0
Feb,110,50.0,31,0
Mar,221,89.0,72,50
Apr,77,87.0,20,60
May,132,,52,70
Jun,205,60.0,55,80


### Select columns with any NaNs: df.isnull( ).any( )

In [50]:
sales.loc[:, sales.isnull().any()]

Unnamed: 0_level_0,salt
month,Unnamed: 1_level_1
Jan,12.0
Feb,50.0
Mar,89.0
Apr,87.0
May,
Jun,60.0


### Select columns without NaN values: df.notnull( ).all( )

In [51]:
sales.loc[:, sales.notnull().all()]

Unnamed: 0_level_0,eggs,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1
Jan,47,17
Feb,110,31
Mar,221,72
Apr,77,20
May,132,52
Jun,205,55


### Remove rows with missing data: df.dropna( )
- (how='any'): drop the row with NaNs
- (how='all'): keep the rows with NaNs

In [54]:
sales.dropna(how='any')

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
Jun,205,60.0,55


### Filtering a column based on another

In [60]:
sales.spam[sales.salt > 55]

month
Mar    72
Apr    20
Jun    55
Name: spam, dtype: int64

### Add 3 to the values that fits the condition

In [65]:
sales.eggs[sales.salt > 55] += 3
sales

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,246,89.0,72
Apr,102,87.0,20
May,132,,52
Jun,230,60.0,55


# Practice 1: Thresholding data

In [67]:
election[election.turnout > 70]

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bucks,PA,319407,49.96697,48.801686,Obama,435606,73.324748,1.165284
Butler,PA,88924,31.920516,66.816607,Romney,122762,72.436096,34.896091
Chester,PA,248295,49.228539,49.650617,Romney,337822,73.498766,0.422079
Forest,PA,2308,38.734835,59.835355,Romney,3232,71.410891,21.10052
Franklin,PA,62802,30.110506,68.583803,Romney,87406,71.850903,38.473297
Montgomery,PA,401787,56.637223,42.286834,Obama,551105,72.905708,14.35039
Westmoreland,PA,168709,37.567646,61.306154,Romney,238006,70.884347,23.738508


In [70]:
close = election.margin < 1
close.head()

county
Adams        False
Allegheny    False
Armstrong    False
Beaver       False
Bedford      False
Name: margin, dtype: bool

### Convert the rows with margin less than 1 to NaNs

In [76]:
election.loc[close, 'winner'] = np.nan

In [77]:
election.info()

<class 'pandas.core.frame.DataFrame'>
Index: 67 entries, Adams to York
Data columns (total 8 columns):
state      67 non-null object
total      67 non-null int64
Obama      67 non-null float64
Romney     67 non-null float64
winner     64 non-null object
voters     67 non-null int64
turnout    67 non-null float64
margin     67 non-null float64
dtypes: float64(4), int64(2), object(2)
memory usage: 7.2+ KB


In [78]:
election[election.margin < 1]

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Berks,PA,163253,48.939376,49.528646,,250356,65.208343,0.589269
Centre,PA,68801,48.948416,48.977486,,112949,60.913333,0.029069
Chester,PA,248295,49.228539,49.650617,,337822,73.498766,0.422079


In [80]:
tan = pd.read_csv('datasets/titanic.csv')
tan.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


In [82]:
df = tan[['age', 'cabin']]
df.head()

Unnamed: 0,age,cabin
0,29.0,B5
1,0.92,C22 C26
2,2.0,C22 C26
3,30.0,C22 C26
4,25.0,C22 C26


df.dropna(how=' ')
* (how='any'): drop the row with NaNs <br>
* (how='all'): keep the rows with NaNs

In [83]:
df.shape

(1309, 2)

### (how='any'): drop the row with NaNs

In [90]:
df.dropna(how='any').shape

(272, 2)

### (how='all'): keep the rows with NaNs

In [97]:
df.dropna(how='all').shape

(1069, 2)

### thresh=1000: to drop columns that have < 1000 non-missing values

In [98]:
df.dropna(thresh=1000, axis='columns').info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 1 columns):
age    1046 non-null float64
dtypes: float64(1)
memory usage: 10.3 KB


# 2. Transforming DataFrames

In [99]:
sales.head()

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,246,89.0,72
Apr,102,87.0,20
May,132,,52


### Convert sales numbers into units of whole dozens (rounded down): 4 methods
- df.floordiv(12)
- np.floor_divide(df, 12)
- def a(n):
- sales.apply(lambda n: n//12)

In [104]:
sales.floordiv(12)

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,3,1.0,1
Feb,9,4.0,2
Mar,20,7.0,6
Apr,8,7.0,1
May,11,,4
Jun,19,5.0,4


In [105]:
np.floor_divide(sales, 12)

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,3.0,1.0,1.0
Feb,9.0,4.0,2.0
Mar,20.0,7.0,6.0
Apr,8.0,7.0,1.0
May,11.0,,4.0
Jun,19.0,5.0,4.0


#### n//12 returns an integer

In [112]:
def a(n):
    return n//12
sales.apply(a)

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,3,1.0,1
Feb,9,4.0,2
Mar,20,7.0,6
Apr,8,7.0,1
May,11,,4
Jun,19,5.0,4


In [114]:
sales.apply(lambda n: n//12)

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,3,1.0,1
Feb,9,4.0,2
Mar,20,7.0,6
Apr,8,7.0,1
May,11,,4
Jun,19,5.0,4


In [117]:
sales['dozens_of_eggs'] = sales.eggs.floordiv(12)

In [119]:
sales.head()

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jan,47,12.0,17,3
Feb,110,50.0,31,9
Mar,246,89.0,72,20
Apr,102,87.0,20,8
May,132,,52,11


In [120]:
sales.index

Index(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'], dtype='object', name='month')

### Series.str.upper( ): make index letters all uppercase

In [121]:
sales.index = sales.index.str.upper()

In [123]:
sales

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JAN,47,12.0,17,3
FEB,110,50.0,31,9
MAR,246,89.0,72,20
APR,102,87.0,20,8
MAY,132,,52,11
JUN,230,60.0,55,19


In [126]:
sales.index = sales.index.map(str.lower)
sales.head()

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
jan,47,12.0,17,3
feb,110,50.0,31,9
mar,246,89.0,72,20
apr,102,87.0,20,8
may,132,,52,11


## Create a new column by using existing values

In [138]:
sales['salty_eggs'] = sales['salt'] + sales['dozens_of_eggs']

In [139]:
sales

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs,salty_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
jan,47,12.0,17,3,15.0
feb,110,50.0,31,9,59.0
mar,246,89.0,72,20,109.0
apr,102,87.0,20,8,95.0
may,132,,52,11,
jun,230,60.0,55,19,79.0


# Practice 2: Using apply( ) to transform a column

In [142]:
weather = pd.read_csv('datasets/pittsburgh2013.csv')
weather.head()

Unnamed: 0,Date,Max TemperatureF,Mean TemperatureF,Min TemperatureF,Max Dew PointF,Mean Dew PointF,Min DewpointF,Max Humidity,Mean Humidity,Min Humidity,...,Max VisibilityMiles,Mean VisibilityMiles,Min VisibilityMiles,Max Wind SpeedMPH,Mean Wind SpeedMPH,Max Gust SpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
0,2013-1-1,32,28,21,30,27,16,100,89,77,...,10,6,2,10,8,,0.0,8,Snow,277
1,2013-1-2,25,21,17,14,12,10,77,67,55,...,10,10,10,14,5,,0.0,4,,272
2,2013-1-3,32,24,16,19,15,9,77,67,56,...,10,10,10,17,8,26.0,0.0,3,,229
3,2013-1-4,30,28,27,21,19,17,75,68,59,...,10,10,6,23,16,32.0,0.0,4,,250
4,2013-1-5,34,30,25,23,20,16,75,68,61,...,10,10,10,16,10,23.0,0.21,5,,221


In [143]:
weather.columns

Index(['Date', 'Max TemperatureF', 'Mean TemperatureF', 'Min TemperatureF',
       'Max Dew PointF', 'Mean Dew PointF', 'Min DewpointF', 'Max Humidity',
       'Mean Humidity', 'Min Humidity', 'Max Sea Level PressureIn',
       'Mean Sea Level PressureIn', 'Min Sea Level PressureIn',
       'Max VisibilityMiles', 'Mean VisibilityMiles', 'Min VisibilityMiles',
       'Max Wind SpeedMPH', 'Mean Wind SpeedMPH', 'Max Gust SpeedMPH',
       'PrecipitationIn', ' CloudCover', 'Events', 'WindDirDegrees'],
      dtype='object')

In [146]:
def celsius(F):
    return 5/9*(F-32)
df_celsius = weather[['Mean TemperatureF', 'Mean Dew PointF']].apply(celsius)
df_celsius.columns = ['Mean TemperatureC', 'Mean Dew PointC']
df_celsius.head()

Unnamed: 0,Mean TemperatureC,Mean Dew PointC
0,-2.222222,-2.777778
1,-6.111111,-11.111111
2,-4.444444,-9.444444
3,-2.222222,-7.222222
4,-1.111111,-6.666667


### Using Series.map() with a dictionary

In [149]:
redvsblue = {'Obama':'blue', 'Romney': 'red'}
election['color'] = election['winner'].map(redvsblue)
election.head()

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin,color
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Adams,PA,41973,35.482334,63.112001,Romney,61156,68.632677,27.629667,red
Allegheny,PA,614671,56.640219,42.18582,Obama,924351,66.497575,14.454399,blue
Armstrong,PA,28322,30.696985,67.901278,Romney,42147,67.19814,37.204293,red
Beaver,PA,80015,46.032619,52.63763,Romney,115157,69.483401,6.605012,red
Bedford,PA,21444,22.057452,76.98657,Romney,32189,66.619031,54.929118,red


In [152]:
zscore(election.turnout)

array([ 0.85373443,  0.43984633,  0.57565034,  1.01864668,  0.46339055,
        0.18992961, -1.62978766, -1.67811834,  1.76328918,  1.59102463,
        0.4115648 , -2.00690534, -0.41140691, -0.64265536,  1.79702245,
       -0.21292049, -0.36907863, -1.76358992, -0.63882099, -0.72673199,
        1.02421347,  0.83473876,  0.86101802, -0.58691702, -0.09392156,
       -2.26015319,  1.39228937,  1.47758532,  0.30389161, -0.71004763,
       -0.62292272, -0.22739249, -0.8586792 ,  1.11463935,  0.14408255,
        1.08675066, -0.25721482,  0.3426399 , -0.04498491, -0.09489986,
        0.71129079, -1.19644405, -0.06680477,  0.48399098, -1.89069251,
        1.68205856, -1.28403638, -0.79798793, -1.33971045,  0.52717328,
       -0.9241102 , -1.71852766,  0.34769042,  0.46386596,  0.99379745,
        0.21159213,  0.95701947,  0.83419812, -0.56442943,  0.65096061,
       -0.16243951, -1.4886646 , -0.18238803,  0.02514726,  1.29021923,
        0.14757638,  0.44085587])

In [153]:
type(zscore(election.turnout))

numpy.ndarray

### Assign turnout_zscore to a new column

In [156]:
election['turnout_zscore'] = zscore(election.turnout)

In [158]:
election.head()

Unnamed: 0_level_0,state,total,Obama,Romney,winner,voters,turnout,margin,color,turnout_zscore
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Adams,PA,41973,35.482334,63.112001,Romney,61156,68.632677,27.629667,red,0.853734
Allegheny,PA,614671,56.640219,42.18582,Obama,924351,66.497575,14.454399,blue,0.439846
Armstrong,PA,28322,30.696985,67.901278,Romney,42147,67.19814,37.204293,red,0.57565
Beaver,PA,80015,46.032619,52.63763,Romney,115157,69.483401,6.605012,red,1.018647
Bedford,PA,21444,22.057452,76.98657,Romney,32189,66.619031,54.929118,red,0.463391
