# Chapter 6 : Data Selection - Series

In [1]:
import pandas as pd

##  Introduction to pandas Series

### The Series Index

In [2]:
income = pd.Series([100,125,105,111,275,137,99,10,250,100,175,200], index=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'], name='income')
income

Jan    100
Feb    125
Mar    105
Apr    111
May    275
Jun    137
Jul     99
Aug     10
Sep    250
Oct    100
Nov    175
Dec    200
Name: income, dtype: int64

In [3]:
income.reset_index(drop=True)

0     100
1     125
2     105
3     111
4     275
5     137
6      99
7      10
8     250
9     100
10    175
11    200
Name: income, dtype: int64

## Data Selection in a pandas Series

### Brackets, dots, Series.loc, and Series.iloc

In [5]:
UK_energy = pd.read_csv('Chapter6-Datasets/UK_energy.csv', index_col=None, usecols=[1]).squeeze("columns")
print(type(UK_energy))
print(UK_energy.head())

<class 'pandas.core.series.Series'>
0    288.177459
1    316.485721
2    338.565899
3    336.866984
4    332.844765
Name: annual_cost, dtype: float64


In [6]:
print('UK_energy.loc[[2,4,6]]\n\n', UK_energy.loc[[2,4,6]])

UK_energy.loc[[2,4,6]]

 2    338.565899
4    332.844765
6    341.909881
Name: annual_cost, dtype: float64


In [7]:
print('UK_energy[2:7:2]\n\n', UK_energy[2:7:2])

UK_energy[2:7:2]

 2    338.565899
4    332.844765
6    341.909881
Name: annual_cost, dtype: float64


In [8]:
print('UK_energy[[2,4,6]]\n\n', UK_energy[[2,4,6]])

UK_energy[[2,4,6]]

 2    338.565899
4    332.844765
6    341.909881
Name: annual_cost, dtype: float64


In [9]:
print('UK_energy.iloc[[,2,4,6]]\n\n', UK_energy.iloc[[2,4,6]])

UK_energy[[,2,4,6]]

 2    338.565899
4    332.844765
6    341.909881
Name: annual_cost, dtype: float64


In [10]:
print('UK_energy.iloc[[2:7:2]]\n\n', UK_energy.iloc[2:7:2])

UK_energy.iloc[[2:7:2]]

 2    338.565899
4    332.844765
6    341.909881
Name: annual_cost, dtype: float64


In [11]:
UK_energy.index = ['year_'+str(i) for i in range(1990,2020)]
UK_energy.index

Index(['year_1990', 'year_1991', 'year_1992', 'year_1993', 'year_1994',
       'year_1995', 'year_1996', 'year_1997', 'year_1998', 'year_1999',
       'year_2000', 'year_2001', 'year_2002', 'year_2003', 'year_2004',
       'year_2005', 'year_2006', 'year_2007', 'year_2008', 'year_2009',
       'year_2010', 'year_2011', 'year_2012', 'year_2013', 'year_2014',
       'year_2015', 'year_2016', 'year_2017', 'year_2018', 'year_2019'],
      dtype='object')

In [12]:
UK_energy.year_1997

326.4184542

In [13]:
UK_energy['year_1997':'year_2011']

year_1997    326.418454
year_1998    306.393163
year_1999    295.687501
year_2000    290.333333
year_2001    283.333333
year_2002    281.666667
year_2003    283.666667
year_2004    291.666667
year_2005    323.666667
year_2006    382.000000
year_2007    423.111111
year_2008    487.333333
year_2009    498.666667
year_2010    484.000000
year_2011    523.181818
Name: annual_cost, dtype: float64

## Exercise 6.01 - basic Series data selection

In [15]:
BOLD = pd.read_csv('Chapter6-Datasets/PLOS_BOLD_S1_patient_1.csv').squeeze('columns')
BOLD

0      0.783670
1      0.293040
2      0.111169
3     -0.169703
4     -0.147029
         ...   
139    0.723983
140    0.687518
141    0.515671
142    0.432008
143    0.146747
Name: Y, Length: 144, dtype: float64

In [16]:
BOLD.index = range(0, 2*len(BOLD),2)
BOLD

0      0.783670
2      0.293040
4      0.111169
6     -0.169703
8     -0.147029
         ...   
278    0.723983
280    0.687518
282    0.515671
284    0.432008
286    0.146747
Name: Y, Length: 144, dtype: float64

In [17]:
B2=BOLD[::2]
B2

0      0.783670
4      0.111169
8     -0.147029
12    -0.032271
16    -0.202202
         ...   
268   -0.014538
272    0.180167
276    0.382172
280    0.687518
284    0.432008
Name: Y, Length: 72, dtype: float64

In [18]:
B2[len(B2):(len(B2)-10):-1]

284    0.432008
280    0.687518
276    0.382172
272    0.180167
268   -0.014538
264   -0.080900
260    0.069567
256    0.153728
252    0.220703
Name: Y, dtype: float64