## 영어 이름 트렌드 분석

출생 연도에 따른 영어 이름 데이터를 사용해서 남자, 여자 구분없이 사용되는 공통 이름을 알아봅니다.<br>
또한, 최근 남자 이름으로 많이 사용하는 이름과 여자 이름으로 많이 이용하는 이름은 무엇인지 알아봅니다.

In [1]:
# 판다스 라이브러리를 불러옵니다. 
import pandas as pd

In [2]:
# 엑셀 파일을 불러오겠습니다.  
# 현재 쥬피터노트북 파일 위치 기준으로(./) data 폴더 내의 babyNamesUS.csv 파일입니다. 
file = './data/babyNamesUS.csv'
raw = pd.read_csv(file)

head()는 DataFrame 형태의 자료형에 사용이 가능하며, () 안에 숫자가 없을시에는 상위 5개를 출력하고, 숫자를 지정해주면 지정해준만큼 출력합니다.

In [3]:
raw.head()

Unnamed: 0,StateCode,Sex,YearOfBirth,Name,Number
0,AK,F,1910,Mary,14
1,AK,F,1910,Annie,12
2,AK,F,1910,Anna,10
3,AK,F,1910,Margaret,8
4,AK,F,1910,Helen,7


info를 사용해서 각 컬럼의 타입과 null값을 확인할 수 있습니다.

In [4]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 5 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   StateCode    1048575 non-null  object
 1   Sex          1048575 non-null  object
 2   YearOfBirth  1048575 non-null  int64 
 3   Name         1048575 non-null  object
 4   Number       1048575 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 40.0+ MB


## Q) 남자 여자 구분없이 사용되는 공통 이름은?  

남성, 여성별 등록된 이름 횟수를 정리하겠습니다. 

피벗 테이블을 이용해 이름/성별에 따른 등록 회수를 정리합니다.

https://pandas.pydata.org/docs/reference/api/pandas.pivot_table.html

In [5]:
name_df = raw.pivot_table(index = 'Name', columns = 'Sex', values = 'Number', aggfunc='sum')
name_df

Sex,F,M
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Aadan,,18.0
Aaden,,855.0
Aadhav,,14.0
Aadhya,188.0,
Aadi,,116.0
...,...,...
Zylah,36.0,
Zyler,,38.0
Zyon,6.0,91.0
Zyra,23.0,


In [6]:
# 비어있는 데이터에 0을 입력합니다. 
name_df = name_df.fillna(0)
name_df

Sex,F,M
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Aadan,0.0,18.0
Aaden,0.0,855.0
Aadhav,0.0,14.0
Aadhya,188.0,0.0
Aadi,0.0,116.0
...,...,...
Zylah,36.0,0.0
Zyler,0.0,38.0
Zyon,6.0,91.0
Zyra,23.0,0.0


In [7]:
# 소수점 형태의 실수 형태로 되어있어, 이를 int 정수형으로 변경합니다. 
name_df = name_df.astype(int)
name_df.head()

Sex,F,M
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Aadan,0,18
Aaden,0,855
Aadhav,0,14
Aadhya,188,0
Aadi,0,116


#### 남자/여자 비율 차이가 적을수록 --> 성별 구분 없는 이름이라고 가정

In [8]:
## 남자/여자 이름 등록수 합계를 계산합니다. 
name_df['Sum'] = name_df['M'] + name_df['F']
name_df.head()

Sex,F,M,Sum
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aadan,0,18,18
Aaden,0,855,855
Aadhav,0,14,14
Aadhya,188,0,188
Aadi,0,116,116


In [9]:
# 남자/여자 등록 비율을 계산합니다. 
name_df['F_ratio'] = name_df['F'] / name_df['Sum']
name_df['F_ratio']

Name
Aadan     0.000000
Aaden     0.000000
Aadhav    0.000000
Aadhya    1.000000
Aadi      0.000000
            ...   
Zylah     1.000000
Zyler     0.000000
Zyon      0.061856
Zyra      1.000000
Zyrah     1.000000
Name: F_ratio, Length: 20815, dtype: float64

In [10]:
name_df['M_ratio'] = name_df['M'] / name_df['Sum']
name_df['M_ratio']

Name
Aadan     1.000000
Aaden     1.000000
Aadhav    1.000000
Aadhya    0.000000
Aadi      1.000000
            ...   
Zylah     0.000000
Zyler     1.000000
Zyon      0.938144
Zyra      0.000000
Zyrah     0.000000
Name: M_ratio, Length: 20815, dtype: float64

In [11]:
# 두 비율의 차이를 계산합니다. 
name_df['M_F_Gap'] = abs(name_df['F_ratio'] - name_df['M_ratio'])
name_df.head()

Sex,F,M,Sum,F_ratio,M_ratio,M_F_Gap
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aadan,0,18,18,0.0,1.0,1.0
Aaden,0,855,855,0.0,1.0,1.0
Aadhav,0,14,14,0.0,1.0,1.0
Aadhya,188,0,188,1.0,0.0,1.0
Aadi,0,116,116,0.0,1.0,1.0


In [12]:
# 이름이 가장 많이 사용된 수를 기준으로 내림차순으로 정렬합니다. 
name_df = name_df.sort_values(by = 'Sum', ascending=False)
name_df.head(20)

Sex,F,M,Sum,F_ratio,M_ratio,M_F_Gap
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Michael,4133,725757,729890,0.005662,0.994338,0.988675
James,3050,693271,696321,0.00438,0.99562,0.99124
Robert,2469,674934,677403,0.003645,0.996355,0.99271
John,2398,670893,673291,0.003562,0.996438,0.992877
David,2003,615943,617946,0.003241,0.996759,0.993517
Mary,519443,1319,520762,0.997467,0.002533,0.994934
William,1402,517796,519198,0.0027,0.9973,0.994599
Richard,1045,382479,383524,0.002725,0.997275,0.994551
Christopher,1701,378022,379723,0.00448,0.99552,0.991041
Daniel,1481,373549,375030,0.003949,0.996051,0.992102


In [13]:
# 남자/여자 사용비율의 차이가 0.1보다 작은 경우를 찾습니다. 
cond = name_df['M_F_Gap'] < 0.1
name_df[cond].head(10)

Sex,F,M,Sum,F_ratio,M_ratio,M_F_Gap
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Jessie,25842,21259,47101,0.548651,0.451349,0.097302
Riley,15539,14929,30468,0.510011,0.489989,0.020021
Emerson,2341,2471,4812,0.486492,0.513508,0.027016
Justice,2083,2461,4544,0.458407,0.541593,0.083187
Kris,2100,2055,4155,0.505415,0.494585,0.01083
Carey,1969,1841,3810,0.516798,0.483202,0.033596
Amari,1694,2057,3751,0.451613,0.548387,0.096774
Stevie,1795,1649,3444,0.521196,0.478804,0.042393
Merle,1623,1612,3235,0.5017,0.4983,0.0034
Jaylin,1174,1021,2195,0.534852,0.465148,0.069704


In [14]:
### 남자/여자 구분없이 가장 많이 사용되는 이름은 아래와 같습니다. 
name_df[cond].head(10).index

Index(['Jessie', 'Riley', 'Emerson', 'Justice', 'Kris', 'Carey', 'Amari',
       'Stevie', 'Merle', 'Jaylin'],
      dtype='object', name='Name')

# James, Mary 가 가장 대표적인 미국 이름???   

## Q) 가장 대표적인 미국이름은??  

- 최근 트렌드에 따른

In [15]:
# unique() 를 통해,기간(raw['YearOfBirth'])에 들어가는 값들을 살펴봅니다. 
raw['YearOfBirth'].unique()

array([1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920,
       1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931,
       1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942,
       1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953,
       1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964,
       1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975,
       1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
       1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
       2009, 2010, 2011, 2012, 2013, 2014, 2015], dtype=int64)

#### 세대 기준으로 그룹 만들기
한 세대 나누는 기준 30년 :  2020년 기준 30년씩 구분
- 1930년대 이전 
- 1960년대 이전 
- 1990년대 이전 
- 2020년 이전

In [16]:
# 출생연도 시리즈에서 순서대로 해당하는 세대 그룹명에 매칭하고 그 결과를 리스트에 저장합니다. 
year_class_list = [ ]

for year in raw['YearOfBirth']:
    if year <= 1930: 
        year_class = '1930년이전'
    elif year<= 1960: 
        year_class = '1960년이전'
    elif year <=1990:
        year_class = '1990년이전'
    else:
        year_class = '2020년이전'
    year_class_list.append(year_class)

In [17]:
# 세대 그룹명이 저장된 리스트를 컬럼으로 추가합니다. 
raw['year_class'] = year_class_list
raw.head()

Unnamed: 0,StateCode,Sex,YearOfBirth,Name,Number,year_class
0,AK,F,1910,Mary,14,1930년이전
1,AK,F,1910,Annie,12,1930년이전
2,AK,F,1910,Anna,10,1930년이전
3,AK,F,1910,Margaret,8,1930년이전
4,AK,F,1910,Helen,7,1930년이전


In [18]:
# pivot_table()을 활용하여 이름/성별, 세대별 이름 등록수 합계 표를 구합니다. 
name_period = raw.pivot_table(index = ['Name', 'Sex'], columns = 'year_class', values = 'Number', aggfunc='sum')
name_period

Unnamed: 0_level_0,year_class,1930년이전,1960년이전,1990년이전,2020년이전
Name,Sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aadan,M,,,,18.0
Aaden,M,,,,855.0
Aadhav,M,,,,14.0
Aadhya,F,,,,188.0
Aadi,M,,,,116.0
...,...,...,...,...,...
Zyler,M,,,,38.0
Zyon,F,,,,6.0
Zyon,M,,,,91.0
Zyra,F,,,,23.0


In [19]:
#NaN 값을 채워줍니다.
name_period = name_period.fillna(0)
name_period = name_period.astype(int)
name_period.head()

Unnamed: 0_level_0,year_class,1930년이전,1960년이전,1990년이전,2020년이전
Name,Sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aadan,M,0,0,0,18
Aaden,M,0,0,0,855
Aadhav,M,0,0,0,14
Aadhya,F,0,0,0,188
Aadi,M,0,0,0,116


#### 전체 컬럼 합계 계산하기
- 모든 컬럼을 하나씩 더하기 : df['컬럼1'] + df['컬럼2'] + ... + df['컬럼n']  
- sum() 활용하기: df.`sum(axis = 1)`
    - 참고) df.sum() 을 활용하면, 기본값으로 axis = 0 으로 지정되며, 컬럼별 합계가 아닌 row 별 합계가 계산됩니다. 

In [20]:
#비효율적
name_period['1930년이전']+name_period['1960년이전']+name_period['1990년이전']+name_period['2020년이전']

Name    Sex
Aadan   M       18
Aaden   M      855
Aadhav  M       14
Aadhya  F      188
Aadi    M      116
              ... 
Zyler   M       38
Zyon    F        6
        M       91
Zyra    F       23
Zyrah   F        5
Length: 22798, dtype: int32

In [21]:
#행의 데이터 기준
name_period.sum()

year_class
1930년이전     3966400
1960년이전    12880074
1990년이전    19431544
2020년이전    18188503
dtype: int64

In [22]:
# sum(axis = 1)을 활용하여 컬럼별 합계를 추가합니다. 
name_period['sum'] = name_period.sum(axis = 1)
name_period.head()

Unnamed: 0_level_0,year_class,1930년이전,1960년이전,1990년이전,2020년이전,sum
Name,Sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aadan,M,0,0,0,18,18
Aaden,M,0,0,0,855,855
Aadhav,M,0,0,0,14,14
Aadhya,F,0,0,0,188,188
Aadi,M,0,0,0,116,116


모든 컬럼을 컬럼별 합계로 나누어, 세대별 등록 비율을 계산합니다. 

계산된 값은 기존컬럼 뒤에 "비율" 이름을 추가한 신규컬럼에 저장합니다. 

In [23]:
for col in name_period.columns:
    col_new = col+"비율"
    #특정 세대의 사용 비율
    name_period[col_new] = name_period[col] / name_period['sum']
    
name_period.head()

Unnamed: 0_level_0,year_class,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
Name,Sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Aadan,M,0,0,0,18,18,0.0,0.0,0.0,1.0,1.0
Aaden,M,0,0,0,855,855,0.0,0.0,0.0,1.0,1.0
Aadhav,M,0,0,0,14,14,0.0,0.0,0.0,1.0,1.0
Aadhya,F,0,0,0,188,188,0.0,0.0,0.0,1.0,1.0
Aadi,M,0,0,0,116,116,0.0,0.0,0.0,1.0,1.0


In [24]:
# 이름 사용수 합계, 2020년 이전 비율, 1990년이전 비율 기준으로 내림차순하여 정리합니다. 
name_period = name_period.sort_values(by = ['sum', '2020년이전비율','1990년이전비율'], ascending=False)
name_period

Unnamed: 0_level_0,year_class,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
Name,Sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Michael,M,4990,198074,377295,145398,725757,0.006876,0.272921,0.519864,0.200340,1.0
James,M,97838,288091,225243,82099,693271,0.141125,0.415553,0.324899,0.118423,1.0
Robert,M,87070,292338,231058,64468,674934,0.129005,0.433136,0.342342,0.095517,1.0
John,M,98536,268873,227108,76376,670893,0.146873,0.400769,0.338516,0.113842,1.0
David,M,16463,203033,278429,118018,615943,0.026728,0.329630,0.452037,0.191605,1.0
...,...,...,...,...,...,...,...,...,...,...,...
Yoshiro,M,5,0,0,0,5,1.000000,0.000000,0.000000,0.000000,1.0
Ysabel,M,5,0,0,0,5,1.000000,0.000000,0.000000,0.000000,1.0
Yvonnie,F,0,5,0,0,5,0.000000,1.000000,0.000000,0.000000,1.0
Zebedee,M,0,5,0,0,5,0.000000,1.000000,0.000000,0.000000,1.0


In [25]:
# 멀티 인덱스로 가지고 있습니다.
name_period.index

MultiIndex([(    'Michael', 'M'),
            (      'James', 'M'),
            (     'Robert', 'M'),
            (       'John', 'M'),
            (      'David', 'M'),
            (       'Mary', 'F'),
            (    'William', 'M'),
            (    'Richard', 'M'),
            ('Christopher', 'M'),
            (     'Daniel', 'M'),
            ...
            (   'Wylodene', 'F'),
            (     'Yayeko', 'F'),
            (      'Yayoi', 'F'),
            (      'Yoshi', 'F'),
            (    'Yoshimi', 'F'),
            (    'Yoshiro', 'M'),
            (     'Ysabel', 'M'),
            (    'Yvonnie', 'F'),
            (    'Zebedee', 'M'),
            (    'Zygmunt', 'M')],
           names=['Name', 'Sex'], length=22798)

인덱스가 여러 레벨로 되어있을 경우, 인덱스를 활용해 컨트롤 하는 것은 복잡하기때문에 reset_index()를 활용하여 인덱스로 설정된 이름과 성별을 컬럼으로 변경합니다. 

In [26]:
name_period = name_period.reset_index()
name_period

year_class,Name,Sex,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
0,Michael,M,4990,198074,377295,145398,725757,0.006876,0.272921,0.519864,0.200340,1.0
1,James,M,97838,288091,225243,82099,693271,0.141125,0.415553,0.324899,0.118423,1.0
2,Robert,M,87070,292338,231058,64468,674934,0.129005,0.433136,0.342342,0.095517,1.0
3,John,M,98536,268873,227108,76376,670893,0.146873,0.400769,0.338516,0.113842,1.0
4,David,M,16463,203033,278429,118018,615943,0.026728,0.329630,0.452037,0.191605,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
22793,Yoshiro,M,5,0,0,0,5,1.000000,0.000000,0.000000,0.000000,1.0
22794,Ysabel,M,5,0,0,0,5,1.000000,0.000000,0.000000,0.000000,1.0
22795,Yvonnie,F,0,5,0,0,5,0.000000,1.000000,0.000000,0.000000,1.0
22796,Zebedee,M,0,5,0,0,5,0.000000,1.000000,0.000000,0.000000,1.0


In [27]:
# 남자 이름만 선택해서 살펴봅니다. 
cond = name_period['Sex'] =='M'
name_period[cond].head(10)
# 상위로 사용되는 James, Robert, John의 경우 1960년대 사용비율이 40% 이상으로, 요즘 트렌드(?)에는 맞지 않는 이름인 것 같네요

year_class,Name,Sex,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
0,Michael,M,4990,198074,377295,145398,725757,0.006876,0.272921,0.519864,0.20034,1.0
1,James,M,97838,288091,225243,82099,693271,0.141125,0.415553,0.324899,0.118423,1.0
2,Robert,M,87070,292338,231058,64468,674934,0.129005,0.433136,0.342342,0.095517,1.0
3,John,M,98536,268873,227108,76376,670893,0.146873,0.400769,0.338516,0.113842,1.0
4,David,M,16463,203033,278429,118018,615943,0.026728,0.32963,0.452037,0.191605,1.0
6,William,M,89173,200843,141872,85908,517796,0.172216,0.387881,0.273992,0.165911,1.0
7,Richard,M,30680,185139,131367,35293,382479,0.080214,0.48405,0.343462,0.092274,1.0
8,Christopher,M,335,20961,233318,123408,378022,0.000886,0.055449,0.617207,0.326457,1.0
9,Daniel,M,7133,59581,166941,139894,373549,0.019095,0.1595,0.446905,0.3745,1.0
10,Joseph,M,34908,75603,130341,100905,341757,0.102143,0.221219,0.381385,0.295254,1.0


In [28]:
# 2020년 이전 비율이 30% 보다 큰 경우에 해당하는 이름만 살펴볼까요?
## 남자의 경우에는 아래와 같습니다. 

cond_age = name_period['2020년이전비율'] > 0.3
cond_sex = name_period['Sex'] == 'M'
cond = cond_age & cond_sex
name_period[cond].head(5)

year_class,Name,Sex,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
8,Christopher,M,335,20961,233318,123408,378022,0.000886,0.055449,0.617207,0.326457,1.0
9,Daniel,M,7133,59581,166941,139894,373549,0.019095,0.1595,0.446905,0.3745,1.0
14,Matthew,M,1160,8822,148707,121522,280211,0.00414,0.031483,0.530697,0.43368,1.0
15,Anthony,M,7132,36965,114441,121379,279917,0.025479,0.132057,0.408839,0.433625,1.0
20,Andrew,M,7369,18639,94219,117022,237249,0.03106,0.078563,0.397131,0.493245,1.0


In [29]:
# 이번에는 여자이름을 살펴보겠습니다. 
cond = name_period['Sex'] =='F'
name_period[cond].head(10)
## Mary는 60년대 이전이 50% 이상 사용되었습니다. Mary 라고 하면,, 할머니를 생각하지 않을까요???
## 순위 상위권에서 보면 Jessica, Sarah, Ashley 가 눈에 띄네요, 2020년 이전 사용 비율이 높습니다. 

year_class,Name,Sex,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
5,Mary,F,143702,260110,92833,22798,519443,0.276646,0.500748,0.178716,0.043889,1.0
11,Jennifer,F,0,12279,253345,59759,325383,0.0,0.037737,0.778606,0.183657,1.0
16,Elizabeth,F,27705,60658,112556,72495,273414,0.10133,0.221854,0.411669,0.265147,1.0
17,Patricia,F,12951,165330,69739,8988,257008,0.050391,0.643287,0.27135,0.034972,1.0
18,Linda,F,813,191589,51390,6207,249999,0.003252,0.766359,0.205561,0.024828,1.0
19,Jessica,F,0,1380,153315,92631,247326,0.0,0.00558,0.61989,0.37453,1.0
23,Barbara,F,24206,154488,35465,3030,217189,0.111451,0.711307,0.163291,0.013951,1.0
28,Sarah,F,11765,20330,93470,68456,194021,0.060638,0.104782,0.481752,0.352828,1.0
30,Michelle,F,0,12895,136478,39668,189041,0.0,0.068213,0.721949,0.209838,1.0
31,Ashley,F,0,0,89243,97123,186366,0.0,0.0,0.478859,0.521141,1.0


In [30]:
# 2020년 이전 비율이 30% 보다 큰 경우에 해당하는 이름만 살펴볼까요?
## 여자의 경우에는 아래와 같습니다. 

cond_age = name_period['2020년이전비율'] > 0.3
cond_sex = name_period['Sex'] == 'F'
cond = cond_age & cond_sex
name_period[cond].head(5)

year_class,Name,Sex,1930년이전,1960년이전,1990년이전,2020년이전,sum,1930년이전비율,1960년이전비율,1990년이전비율,2020년이전비율,sum비율
19,Jessica,F,0,1380,153315,92631,247326,0.0,0.00558,0.61989,0.37453,1.0
28,Sarah,F,11765,20330,93470,68456,194021,0.060638,0.104782,0.481752,0.352828,1.0
31,Ashley,F,0,0,89243,97123,186366,0.0,0.0,0.478859,0.521141,1.0
37,Stephanie,F,252,11271,111214,55909,178646,0.001411,0.063091,0.622538,0.31296,1.0
51,Emily,F,3816,6191,38195,105767,153969,0.024784,0.040209,0.248069,0.686937,1.0
