In [1]:
# THE PANDAS SERIES
# The pandas SERIES is the base data structure of pandas. A series is similar to a NumPy array, 
# but it differs by having an index, which allows for much richer lookup of items
# instead of just a zero-based array index value.

In [2]:
import pandas as pd

In [3]:
# creating a four item Series
s = pd.Series([1,2,3,4])
print(s,'\n')
'''The output consists of two columns of information. The first is the index and the second is
the data in the SERIES . Each row of the output represents the index label (in the first
column) and then the value associated with that label.'''

'''Because this SERIES was created without specifying an index (something we will do next),
pandas automatically creates an integer index with labels starting at 0 and increasing by one
for each data item.'''

# getting the value at label 1
print(s[1],'\n')
# retrieving multiple items by specifying their labels in a list
# returning a series with the row labels of 1 and 3
print(s[[1,3]])

0    1
1    2
2    3
3    4
dtype: int64 

2 

1    2
3    4
dtype: int64


In [4]:
'''A series object can be created with a user-defined index by using the JOEFY parameter
and specifying the index labels. The following creates a 4FSJFT with the same values but
with an index consisting of string values:'''
s = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(s,'\n')

'''Data in the series object can now be accessed by those alphanumeric index labels. The
following retrieves the values at index labels a and d :'''
print(s[['a', 'd']],'\n')

'''It is still possible to refer to the elements of this series object by their numerical 0-based
position. :'''
print(s[[0, 3]], '\n')

'''We can examine the index of a series using the .index property:'''
print(s.index)

a    1
b    2
c    3
d    4
dtype: int64 

a    1
d    4
dtype: int64 

a    1
d    4
dtype: int64 

Index(['a', 'b', 'c', 'd'], dtype='object')


In [5]:
# an example of the usage of pandas series 
# we use it to represent a time-series that associates date/time index with values
# so we shall be creating a series whose index is series of dates between two specified dates
dates_index = pd.date_range('2019-01-01', '2019-01-10')

'''This has created a special index in pandas called %BUFUJNF*OEFY , which is a specialized
type of pandas index that is optimized to index data with dates and times.'''

# let create a series with values (representing temperatures) 
# for each of the values/months in the index
temps = pd.Series([78,89,45,67,67,58,48,83,87,34], index=dates_index)
print(temps)

'''This type of series with a DateTimeIndex is referred to as a time series.'''

# We can look up a temperature on a specific data by using the date as a string:
# what's the temperature for 2019-01-06
print('the temp for 2019-01-06 is',temps['2019-01-06'])

2019-01-01    78
2019-01-02    89
2019-01-03    45
2019-01-04    67
2019-01-05    67
2019-01-06    58
2019-01-07    48
2019-01-08    83
2019-01-09    87
2019-01-10    34
Freq: D, dtype: int64
the temp for 2019-01-06 is 58


In [6]:
'''Two series objects can be applied to each other with an arithmetic operation. The
following code creates a second series and calculates the difference in temperature
between the two:'''

# creating a second series of temp and using the same index
temp2 = pd.Series([48,39,42,76,37,28,48,93,21,60], index=dates_index)

# the following aligns the two by their index values and calculates
# the difference at those matching labels
temp_diffs = temps - temp2
print(temp_diffs)

'''note:The result of an arithmetic operation (+, -, /, *, ...) on two series objects
that are non-scalar values returns another series object.'''

# Since the index is not integer, we can also look up values by 0-based value:
temp_diffs[3]

2019-01-01    30
2019-01-02    50
2019-01-03     3
2019-01-04    -9
2019-01-05    30
2019-01-06    30
2019-01-07     0
2019-01-08   -10
2019-01-09    66
2019-01-10   -26
Freq: D, dtype: int64


-9

In [7]:
# using pandas descriptive statistical methods
temp_diffs.mean()

16.4