# Pandas is data analysis library built on top of Numpy library...


In [1]:
import pandas as pd
import numpy as np
a=pd.Series(np.arange(10,101,10),index=np.arange(1,11))

In [2]:
a

1      10
2      20
3      30
4      40
5      50
6      60
7      70
8      80
9      90
10    100
dtype: int32

In [3]:
a.index=pd.RangeIndex(11,21)

In [4]:
a.index

RangeIndex(start=11, stop=21, step=1)

In [5]:
a.head()

11    10
12    20
13    30
14    40
15    50
dtype: int32

In [6]:
a.tail()

16     60
17     70
18     80
19     90
20    100
dtype: int32

# Accessing data from series object

There are multiple options are available to retrieve the data 
1. index based 
2. slice based
3. advanced indexing
4. condition based selection


In [7]:
a=pd.Series(np.arange(10,101,10),index=np.arange(11,21).astype('str'))

In [8]:
a.head(3)

11    10
12    20
13    30
dtype: int32

In [9]:
a[0]

10

In [10]:
a['11':'16']

11    10
12    20
13    30
14    40
15    50
16    60
dtype: int32

In [11]:
a[:3]

11    10
12    20
13    30
dtype: int32

In [12]:
a[[1,5]]

12    20
16    60
dtype: int32

In [13]:
a[-3:-2]

18    80
dtype: int32

In [14]:
a['18']

80

In [15]:
b=a.get(14,"Empty")

In [16]:
b

'Empty'

In [17]:
a.iloc[6]

70

In [18]:
a.iloc[[1,3,6,7]]

12    20
14    40
17    70
18    80
dtype: int32

In [19]:
a.iloc[2:4]

13    30
14    40
dtype: int32

In [20]:
a.iloc[:3]

11    10
12    20
13    30
dtype: int32

In [21]:
a.iloc[-1]

100

In [22]:
a.loc['12']

20

In [23]:
a.loc[['11','12','14']]

11    10
12    20
14    40
dtype: int32

In [24]:
a.loc['11':'13':2]

11    10
13    30
dtype: int32

In [25]:
#condition based selection....

a.loc[a>38]


14     40
15     50
16     60
17     70
18     80
19     90
20    100
dtype: int32

In [26]:
a.get(a<36)

11    10
12    20
13    30
dtype: int32

In [27]:
def callble(s):
    return [True if x%3==0 and x%4==0 else False for x in s]

In [28]:
a[callble]

16    60
dtype: int32

In [29]:
a.iloc[callble]

16    60
dtype: int32

In [30]:
a.loc[callble]

16    60
dtype: int32

In [31]:
a[lambda s:[True if x%3==0 and x%4==0 else False for x in s]]

16    60
dtype: int32

In [32]:
a.values

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [33]:
a.index

Index(['11', '12', '13', '14', '15', '16', '17', '18', '19', '20'], dtype='object')

In [34]:
a.dtype

dtype('int32')

In [35]:
a.size

10

In [36]:
a.shape

(10,)

In [37]:
a.ndim

1

In [38]:
s=pd.Series(data=[1,2,4,7,3,2,1,np.nan])

In [39]:
s.unique()

array([ 1.,  2.,  4.,  7.,  3., nan])

In [40]:
s.nunique()

5

In [41]:
s.is_monotonic

False

In [42]:
s.is_monotonic_increasing

False

In [43]:
s.is_monotonic_decreasing

False

In [44]:
s.hasnans

True

In [45]:
s.values

array([ 1.,  2.,  4.,  7.,  3.,  2.,  1., nan])

In [46]:
s.index

RangeIndex(start=0, stop=8, step=1)

In [47]:
s.size

8

In [48]:
s.shape

(8,)

In [49]:
s.ndim

1

In [50]:
s.dtype

dtype('float64')

In [51]:
s.is_unique

False

In [52]:
s.is_monotonic_increasing

False

In [53]:
s.hasnans

True

In [54]:
s.value_counts()

1.0    2
2.0    2
4.0    1
7.0    1
3.0    1
dtype: int64

In [55]:
sorted(s)

[1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 7.0, nan]

In [56]:
pd.read_csv

<function pandas.io.parsers.readers.read_csv(filepath_or_buffer: 'FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str]', sep=<no_default>, delimiter=None, header='infer', names=<no_default>, index_col=None, usecols=None, squeeze=None, prefix=<no_default>, mangle_dupe_cols=True, dtype: 'DtypeArg | None' = None, engine: 'CSVEngine | None' = None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=None, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression: 'CompressionOptions' = 'infer', thousands=None, decimal: 'str' = '.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, encoding_errors: 'str | None' = 'strict', dialect=None, error_bad_lines=None, warn_bad_lines=Non

In [57]:
s.count()

7

In [58]:
s.size

8

In [59]:
s.hasnans

True

# Data analysis using pandas functions

In [60]:
s[s.isna()] #we can use isnull() func also

7   NaN
dtype: float64

In [61]:
s[s.notna()].size #we can use not null func also

7

In [62]:
b=s.dropna(inplace=False)

In [63]:
b

0    1.0
1    2.0
2    4.0
3    7.0
4    3.0
5    2.0
6    1.0
dtype: float64

In [64]:
s.fillna(int(np.mean(s)),inplace=False)

0    1.0
1    2.0
2    4.0
3    7.0
4    3.0
5    2.0
6    1.0
7    2.0
dtype: float64

In [65]:
s.tail()

3    7.0
4    3.0
5    2.0
6    1.0
7    NaN
dtype: float64

In [66]:
s.sum()

20.0

In [67]:
s.mean()

2.857142857142857

In [68]:
np.mean(s)

2.857142857142857

In [69]:
s.var(skipna=True)

4.476190476190475

In [70]:
s.median()

2.0

In [71]:
s.std()

2.115700942049815

In [72]:
np.sqrt(s.var())

2.115700942049815

In [73]:
s.mode()

0    1.0
1    2.0
dtype: float64

In [74]:
s.value_counts()

1.0    2
2.0    2
4.0    1
7.0    1
3.0    1
dtype: int64

In [75]:
s.describe()

count    7.000000
mean     2.857143
std      2.115701
min      1.000000
25%      1.500000
50%      2.000000
75%      3.500000
max      7.000000
dtype: float64

In [76]:
s.idxmax() #returns index label of maximum element

3

In [77]:
s.idxmin() #returns index label of minimum element

0

In [78]:
s.nlargest(5)

3    7.0
2    4.0
4    3.0
1    2.0
5    2.0
dtype: float64

In [79]:
s.nsmallest(3)

0    1.0
6    1.0
1    2.0
dtype: float64

In [80]:
s.sort_values(ascending=True,na_position='first')

7    NaN
0    1.0
6    1.0
1    2.0
5    2.0
4    3.0
2    4.0
3    7.0
dtype: float64

In [81]:
s.sort_index(ascending=False)

7    NaN
6    1.0
5    2.0
4    3.0
3    7.0
2    4.0
1    2.0
0    1.0
dtype: float64

In [82]:
s.sum()

20.0

In [83]:
s.cumsum()

0     1.0
1     3.0
2     7.0
3    14.0
4    17.0
5    19.0
6    20.0
7     NaN
dtype: float64

In [84]:
s.prod()

336.0

In [85]:
s.cumprod()

0      1.0
1      2.0
2      8.0
3     56.0
4    168.0
5    336.0
6    336.0
7      NaN
dtype: float64

In [86]:
s.cummax()

0    1.0
1    2.0
2    4.0
3    7.0
4    7.0
5    7.0
6    7.0
7    NaN
dtype: float64

In [87]:
s.cummin()

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
5    1.0
6    1.0
7    NaN
dtype: float64

In [88]:
a=pd.Series(data=np.array([1,3,6,8,9]),index=['aa','ab','ac','da','ea'])

In [89]:
a.diff(-2)

aa   -5.0
ab   -5.0
ac   -3.0
da    NaN
ea    NaN
dtype: float64

In [90]:
a.filter(regex='^a') #to filter the elements based on the index label to filter based on the value boolean masking can be used

aa    1
ab    3
ac    6
dtype: int32

In [91]:
np.where(a%2==0,'even','odd')

array(['odd', 'odd', 'even', 'even', 'odd'], dtype='<U4')

In [94]:
a.mask(call,'divided by 2')

aa               1
ab               3
ac    divided by 2
da    divided by 2
ea               9
dtype: object

In [93]:
def call(x):
    return [True if i % 2==0 else False for i in x]

In [95]:
a.mask(call,'divided by 2')

aa               1
ab               3
ac    divided by 2
da    divided by 2
ea               9
dtype: object

In [None]:
a


In [96]:
a.index=np.arange(0,5)

In [None]:
a

In [97]:
a.update(pd.Series(np.array([1,22,3]),index=[2,4,9]))

In [None]:
a

In [98]:
a.update(np.array([1,2,3,4,5,6,7,8,9,0]))

In [99]:
a

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [100]:
def increase(x,y,z):
    return x+y+z


In [101]:
a.apply(increase,y=30,z=10)

0    41
1    42
2    43
3    44
4    45
dtype: int64

In [102]:
a.apply(lambda x:x**2)

0     1
1     4
2     9
3    16
4    25
dtype: int64

In [103]:
a.apply(np.square)

0     1
1     4
2     9
3    16
4    25
dtype: int32

In [104]:
a.map(lambda x:x+2)

0    3
1    4
2    5
3    6
4    7
dtype: int64

# Pandas Dataframe

In [105]:
df=pd.read_csv("Customers.csv")

In [106]:
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [107]:
df.columns

Index(['CustomerID', 'Genre', 'Age', 'Annual Income (k$)',
       'Spending Score (1-100)'],
      dtype='object')

In [108]:
df.index

RangeIndex(start=0, stop=200, step=1)

In [109]:
df.ndim

2

In [110]:
df.shape

(200, 5)

In [111]:
df.axes

[RangeIndex(start=0, stop=200, step=1),
 Index(['CustomerID', 'Genre', 'Age', 'Annual Income (k$)',
        'Spending Score (1-100)'],
       dtype='object')]

In [113]:
df.sample(3)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
95,96,Male,24,60,52
103,104,Male,26,62,55
80,81,Male,57,54,51


In [114]:
df.size

1000

In [116]:
df.dtypes

CustomerID                 int64
Genre                     object
Age                        int64
Annual Income (k$)         int64
Spending Score (1-100)     int64
dtype: object

In [117]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   CustomerID              200 non-null    int64 
 1   Genre                   200 non-null    object
 2   Age                     200 non-null    int64 
 3   Annual Income (k$)      200 non-null    int64 
 4   Spending Score (1-100)  200 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 7.9+ KB


In [118]:
df['Age'].describe()

count    200.000000
mean      38.850000
std       13.969007
min       18.000000
25%       28.750000
50%       36.000000
75%       49.000000
max       70.000000
Name: Age, dtype: float64

In [119]:
df.tail(3)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
197,198,Male,32,126,74
198,199,Male,32,137,18
199,200,Male,30,137,83


In [120]:
df.head(2)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81


In [127]:
df['Age'].head(3)

0    19
1    21
2    20
Name: Age, dtype: int64

In [129]:
df[['Age','Genre']].head(3)

Unnamed: 0,Age,Genre
0,19,Male
1,21,Male
2,20,Female


In [130]:
#Arithmetic operations for dataframe....
#we can not perform the arithmetic operations directly on data frame as it contains multiple columns

In [141]:
(df['Age']+1000).head(5)

0    1019
1    1021
2    1020
3    1023
4    1031
Name: Age, dtype: int64

In [149]:
df["Age"].sub(10).head()

0     9
1    11
2    10
3    13
4    21
Name: Age, dtype: int64

In [150]:
df['newcolumn']=100

In [157]:
df.drop(columns=['newcolumn'],inplace=True)

In [158]:
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [192]:
df.insert(1,'newcolumn',100)

In [161]:
df.head()

Unnamed: 0,CustomerID,newcolumn,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,100,Male,19,15,39
1,2,100,Male,21,15,81
2,3,100,Female,20,16,6
3,4,100,Female,23,16,77
4,5,100,Female,31,17,40


In [193]:
df.drop(columns=['newcolumn'],inplace=True)

In [166]:
df.isnull().sum()

CustomerID                0
Genre                     0
Age                       0
Annual Income (k$)        0
Spending Score (1-100)    0
dtype: int64

In [167]:
#dropna() to drop the rows or columns that contains the null values

In [202]:
df['newcolumn']=np.nan

In [184]:
df.isnull().sum()

CustomerID                  0
Genre                       0
Age                         0
Annual Income (k$)          0
Spending Score (1-100)      0
newcolumn                 200
dtype: int64

In [197]:
df.dropna(axis=0,how='any',inplace=False,subset=['Age','newcolumn']) #axis can be used to mention whether to delete the rows or columns....

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn


In [203]:
df.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,1,Male,19,15,39,
1,2,Male,21,15,81,
2,3,Female,20,16,6,
3,4,Female,23,16,77,
4,5,Female,31,17,40,


In [201]:
df.drop(columns=['newcolumn'])

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
...,...,...,...,...,...
195,196,Female,35,120,79
196,197,Female,45,126,28
197,198,Male,32,126,74
198,199,Male,32,137,18


In [205]:
df.fillna(0)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,1,Male,19,15,39,0.0
1,2,Male,21,15,81,0.0
2,3,Female,20,16,6,0.0
3,4,Female,23,16,77,0.0
4,5,Female,31,17,40,0.0
...,...,...,...,...,...,...
195,196,Female,35,120,79,0.0
196,197,Female,45,126,28,0.0
197,198,Male,32,126,74,0.0
198,199,Male,32,137,18,0.0


In [210]:
df.sort_values(by=['Age','CustomerID'],ascending=[False,True])

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
60,61,Male,70,46,56,
70,71,Male,70,49,55,
57,58,Male,69,44,46,
67,68,Female,68,48,48,
90,91,Female,68,59,55,
...,...,...,...,...,...,...
162,163,Male,19,81,5,
33,34,Male,18,33,92,
65,66,Male,18,48,59,
91,92,Male,18,59,41,


In [212]:
df.sort_index(ascending=False)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
199,200,Male,30,137,83,
198,199,Male,32,137,18,
197,198,Male,32,126,74,
196,197,Female,45,126,28,
195,196,Female,35,120,79,
...,...,...,...,...,...,...
4,5,Female,31,17,40,
3,4,Female,23,16,77,
2,3,Female,20,16,6,
1,2,Male,21,15,81,


In [225]:
df[df['Genre']=='Male']

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,1,Male,19,15,39,
1,2,Male,21,15,81,
8,9,Male,64,19,3,
10,11,Male,67,19,14,
14,15,Male,37,20,13,
...,...,...,...,...,...,...
187,188,Male,28,101,68,
192,193,Male,33,113,8,
197,198,Male,32,126,74,
198,199,Male,32,137,18,


In [228]:
c=df['Genre']=='Male'
d=df['Age']>50


In [233]:
df[c & d].shape

(22, 6)

In [243]:
df[df['Age'].isin([30,40])].shape

(13, 6)

In [245]:
df[df['Age'].between(30,41)].shape

(69, 6)

In [247]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   CustomerID              200 non-null    int64  
 1   Genre                   200 non-null    object 
 2   Age                     200 non-null    int64  
 3   Annual Income (k$)      200 non-null    int64  
 4   Spending Score (1-100)  200 non-null    int64  
 5   newcolumn               0 non-null      float64
dtypes: float64(1), int64(4), object(1)
memory usage: 9.5+ KB


In [253]:
df.notna().head(4)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,True,True,True,True,True,False
1,True,True,True,True,True,False
2,True,True,True,True,True,False
3,True,True,True,True,True,False


In [258]:
df[df['Genre'].notnull()].shape

(200, 6)

In [270]:
df['Age'].duplicated(keep='first')

0      False
1      False
2      False
3      False
4      False
       ...  
195     True
196     True
197     True
198     True
199     True
Name: Age, Length: 200, dtype: bool

In [286]:
df.drop_duplicates(subset=['Age']).head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,1,Male,19,15,39,
1,2,Male,21,15,81,
2,3,Female,20,16,6,
3,4,Female,23,16,77,
4,5,Female,31,17,40,


In [287]:
df['Genre'].unique()

array(['Male', 'Female'], dtype=object)

In [288]:
df['Genre'].nunique()

2

In [290]:
b=df.set_index('CustomerID')

In [294]:
b.reset_index().head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,1,Male,19,15,39,
1,2,Male,21,15,81,
2,3,Female,20,16,6,
3,4,Female,23,16,77,
4,5,Female,31,17,40,


In [304]:
df.loc[:4,'Genre':'Age']

Unnamed: 0,Genre,Age
0,Male,19
1,Male,21
2,Female,20
3,Female,23
4,Female,31


In [315]:
df.loc[df['Age']>50].sample()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
8,9,Male,64,19,3,


In [311]:
df.iloc[2:7,:4]

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$)
2,3,Female,20,16
3,4,Female,23,16
4,5,Female,31,17
5,6,Female,22,17
6,7,Female,35,18


In [316]:
df.index

RangeIndex(start=0, stop=200, step=1)

In [317]:
df.columns

Index(['CustomerID', 'Genre', 'Age', 'Annual Income (k$)',
       'Spending Score (1-100)', 'newcolumn'],
      dtype='object')

In [318]:
df.rename(columns={"Age":"age"},inplace=True)

In [319]:
df.columns

Index(['CustomerID', 'Genre', 'age', 'Annual Income (k$)',
       'Spending Score (1-100)', 'newcolumn'],
      dtype='object')

In [320]:
df.rename(columns={"age":"Age"},inplace=True)

In [321]:
df.sample()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
191,192,Female,32,103,69,


In [332]:
df.query('`Age`<30 and `Spending Score (1-100)`<10')

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
2,3,Female,20,16,6,
134,135,Male,20,73,5,
162,163,Male,19,81,5,


In [338]:
df.query(" `Age` in [30,50]").head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
9,10,Female,30,19,72,
37,38,Female,30,34,73,
46,47,Female,50,40,55,
54,55,Female,50,43,45,
89,90,Female,50,58,46,


In [403]:
def incage(x):
    if x[2]>10050:
        x['c']='senior'
        
    else:
        x['c']='junior'
    
    return x
   



In [392]:
df.apply(incage,axis=1)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn
0,1,Male,10019,15,39,
1,2,Male,10021,15,81,
2,223,Female,10020,16,6,
3,4,Female,10023,16,77,
4,5,Female,10031,17,40,
...,...,...,...,...,...,...
195,196,Female,10035,120,79,
196,197,Female,10045,126,28,
197,198,Male,10032,126,74,
198,199,Male,10032,137,18,


In [404]:
df=df.apply(incage,axis=1)

In [405]:
df['c'].unique()

array(['junior', 'senior'], dtype=object)

In [409]:
df.nlargest(5,columns=['Age','CustomerID'])

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn,c
70,71,Male,10070,49,55,,senior
60,61,Male,10070,46,56,,senior
57,58,Male,10069,44,46,,senior
108,109,Male,10068,63,43,,senior
90,91,Female,10068,59,55,,senior


In [410]:
b=df.head()

In [411]:
c=df.tail()

In [413]:
pd.concat([b,c],axis=1)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100),newcolumn,c,CustomerID.1,Genre.1,Age.1,Annual Income (k$).1,Spending Score (1-100).1,newcolumn.1,c.1
0,1.0,Male,10019.0,15.0,39.0,,junior,,,,,,,
1,2.0,Male,10021.0,15.0,81.0,,junior,,,,,,,
2,223.0,Female,10020.0,16.0,6.0,,junior,,,,,,,
3,4.0,Female,10023.0,16.0,77.0,,junior,,,,,,,
4,5.0,Female,10031.0,17.0,40.0,,junior,,,,,,,
195,,,,,,,,196.0,Female,10035.0,120.0,79.0,,junior
196,,,,,,,,197.0,Female,10045.0,126.0,28.0,,junior
197,,,,,,,,198.0,Male,10032.0,126.0,74.0,,junior
198,,,,,,,,199.0,Male,10032.0,137.0,18.0,,junior
199,,,,,,,,200.0,Male,10030.0,137.0,83.0,,junior


In [419]:
pd.merge(b,c,left_on='Annual Income (k$)',right_on='Annual Income (k$)',how='right')

Unnamed: 0,CustomerID_x,Genre_x,Age_x,Annual Income (k$),Spending Score (1-100)_x,newcolumn_x,c_x,CustomerID_y,Genre_y,Age_y,Spending Score (1-100)_y,newcolumn_y,c_y
0,,,,120,,,,196,Female,10035,79,,junior
1,,,,126,,,,197,Female,10045,28,,junior
2,,,,126,,,,198,Male,10032,74,,junior
3,,,,137,,,,199,Male,10032,18,,junior
4,,,,137,,,,200,Male,10030,83,,junior
