In [1]:
## Series pandas
# one dimensional labelled array
# hold any type of data type

# Create series
import pandas as pd
print(pd.Series([1,2,3,4]))   # pass list    # dtype = int32

0    1
1    2
2    3
3    4
dtype: int64


In [2]:
# bool list
print(pd.Series([True, False, True]))     # dtype = bool

0     True
1    False
2     True
dtype: bool


In [3]:
# Mixed type
print(pd.Series([True, 1, 'name', 9.8]))
# We can use different datatypes in series but the intention to use homogeneous data
# So that we can perform mathematical or common operations effectively

0    True
1       1
2    name
3     9.8
dtype: object


In [4]:
# Create a Series with your own indexing
print(pd.Series([1,2,3,4] , index = [4,3,2,1]))   # We start with index 4
cu_series  = pd.Series([1,2,3,4] , index = [4,3,2,1])
print(cu_series[4])

4    1
3    2
2    3
1    4
dtype: int64
1


In [5]:
# We can use list comprehend feature here
print(pd.Series(n*2 for n in range(1,10) if n % 2 == 0))

0     4
1     8
2    12
3    16
dtype: int64


In [6]:
# Create series with dictionary
dictt = {'my':'name',
         'is':'Kishan'}
print(dictt)
# Create series from dict
print(pd.Series(dictt))   # Here series index is key of dictionary

{'my': 'name', 'is': 'Kishan'}
my      name
is    Kishan
dtype: object


In [8]:
# Create series from csv file
# We need to create csv file with proper tool otherwise we may end up getting endless columns
# With header
data = pd.read_csv('sample_csv.csv')
print(type(data))

<class 'pandas.core.frame.DataFrame'>


In [9]:
# Add header property
data_h = pd.read_csv('header_false.csv',header = None)
# Type of read_csv 
print(type(data))

<class 'pandas.core.frame.DataFrame'>


In [10]:
# How to change or read this csv into series
# But the data in csv file must be single column if not then following script will return
# data frame
data_s = pd.read_csv('header_false.csv', header = None, squeeze = True)
print(type(data_s))

<class 'pandas.core.series.Series'>


In [11]:
# When it comes to multi column csv file - It automatically convert it into DataFrame
data_m = pd.read_csv('multi_clm.csv')
print(type(data_m))

<class 'pandas.core.frame.DataFrame'>


In [12]:
# Convert DataFrame to Series
data_m_series = pd.read_csv('multi_clm.csv', squeeze = True, usecols = ['Day'])
print(type(data_m_series))

<class 'pandas.core.series.Series'>


In [13]:
## Attributes and Methods about Series
import pandas as pd
dataframe_series = pd.read_csv('multi_clm.csv', squeeze = True, usecols = ['Day'])
print(dataframe_series)

0    1
1    2
2    3
3    3
Name: Day, dtype: int64


In [14]:
# Indexing mechanism
# It will give you how indexing is assign and what is start, stop index and step
print(dataframe_series.index)    # .index is attribute
# If we are not using () after word that is attribte not a method

RangeIndex(start=0, stop=4, step=1)


In [15]:
# values of series
dataframe_series_val = dataframe_series.values
print(dataframe_series_val)

[1 2 3 3]


In [16]:
# dimention of series
print(dataframe_series.ndim)   # 1 - dimentional

1


In [18]:
# Type
print(dataframe_series.dtype)

int64


In [19]:
# size of series
print(dataframe_series.size)

4


In [20]:
# check for unique element in series
print(dataframe_series.is_unique)    #  If we haev any column or series elemennt double then it will
# return False

False


In [21]:
# Check monotonically increasing or not
print(dataframe_series.is_monotonic_increasing)
print(dataframe_series.is_monotonic_decreasing)

True
False


In [22]:
# Methods on series 
# Add suffix to idexing
print(dataframe_series.add_prefix('game '))

game 0    1
game 1    2
game 2    3
game 3    3
Name: Day, dtype: int64


In [23]:
# We can also add suffix
print(dataframe_series.add_suffix(' gama'))

0 gama    1
1 gama    2
2 gama    3
3 gama    3
Name: Day, dtype: int64


In [24]:
# Sum of all values of series
print(dataframe_series.sum())

9


In [25]:
# Find the index of max element in series
print(dataframe_series.idxmax())   # It will check and compare all series element and
# give preference to first occurence of max element

2


In [26]:
# Find the index of min element in series
print(dataframe_series.idxmin())   # It will check and compare all series element and
# give preference to first occurence of max element

0


In [27]:
# Head - 5 values from series
head = dataframe_series.head()   # We can pass values inside head() as integer
print(head)

0    1
1    2
2    3
3    3
Name: Day, dtype: int64


In [28]:
# tail - last 5 values
tail = dataframe_series.tail()
print(tail)

0    1
1    2
2    3
3    3
Name: Day, dtype: int64


In [29]:
# Average of series
print(dataframe_series.mean())

2.25


In [30]:
# Multiply all element
print(dataframe_series.product())

18


In [31]:
 # Find the standard deviation
print(dataframe_series.std())   # The amount of deviation 

0.9574271077563381


In [32]:
# Shape
print(dataframe_series.shape)

(4,)


In [33]:
# We calculated individual all information
# but we need int or float type series
description = dataframe_series.describe() # This will return series object
print(description['count'])
print(description['mean'])
print(description['std'])
print(description['min'])
print(description['max'])
print(description['25%'])
print(description['50%'])
print(description['75%'])

4.0
2.25
0.9574271077563381
1.0
3.0
1.75
2.5
3.0


In [34]:
# Sort value
temp = [30, 31, 32, 33, 34, 32, 30, 31, 29, 33, 34, 32]
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
data_series = pd.Series(temp, month)   # Here first list will consider as indexing

In [35]:
# sort values
print(data_series.sort_values())  # Ascending
print(data_series.sort_values(ascending = False))  # Descending

Sep    29
Jan    30
Jul    30
Feb    31
Aug    31
Mar    32
Jun    32
Dec    32
Apr    33
Oct    33
May    34
Nov    34
dtype: int64
Nov    34
May    34
Oct    33
Apr    33
Dec    32
Jun    32
Mar    32
Aug    31
Feb    31
Jul    30
Jan    30
Sep    29
dtype: int64


In [36]:
# Assignment and permant change
data_series = data_series.sort_values()
print(data_series)   # sorted in same series

Sep    29
Jan    30
Jul    30
Feb    31
Aug    31
Mar    32
Jun    32
Dec    32
Apr    33
Oct    33
May    34
Nov    34
dtype: int64


In [37]:
# But if we do not want to assign every time we want permant change
data_series.sort_values(inplace = True)
print(data_series)  # Done

Sep    29
Jan    30
Jul    30
Feb    31
Aug    31
Mar    32
Jun    32
Dec    32
Apr    33
Oct    33
May    34
Nov    34
dtype: int64


In [38]:
# Sort indexing
print(data_series.index)
print(data_series.sort_index())
print(data_series.sort_index(ascending = False))

Index(['Sep', 'Jan', 'Jul', 'Feb', 'Aug', 'Mar', 'Jun', 'Dec', 'Apr', 'Oct',
       'May', 'Nov'],
      dtype='object')
Apr    33
Aug    31
Dec    32
Feb    31
Jan    30
Jul    30
Jun    32
Mar    32
May    34
Nov    34
Oct    33
Sep    29
dtype: int64
Sep    29
Oct    33
Nov    34
May    34
Mar    32
Jun    32
Jul    30
Jan    30
Feb    31
Dec    32
Aug    31
Apr    33
dtype: int64


In [39]:
# Built in function
temp = [30, 31, 32, 33, 34, 32, 30, 31, 29, 33, 34, 32]
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
data_series = pd.Series(temp, month)   # Here first list will consider as indexing

In [40]:
# length
print(len(data_series))

12


In [41]:
# Type
print(type(data_series))

<class 'pandas.core.series.Series'>


In [42]:
# Type conversion
list_series = list(data_series)
dict_series = dict(data_series)

In [43]:
# Sorted
print(sorted(data_series))

[29, 30, 30, 31, 31, 32, 32, 32, 33, 33, 34, 34]


In [44]:
# max
print(max(data_series))
print(min(data_series))

34
29


In [45]:
## Extract values from series
temp = [30, 31, 32, 33, 34, 32, 30, 31, 29, 33, 34, 32]
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
data_series = pd.Series(temp, month)   # Here first list will consider as indexing

In [46]:
# 
print(data_series[:])  # All
print(data_series[2:]) # From index
print(data_series[:3])  # last three
# We can use all indexing same aslist on series
# Grap individual
print(data_series[['Jan', 'Feb']])

Jan    30
Feb    31
Mar    32
Apr    33
May    34
Jun    32
Jul    30
Aug    31
Sep    29
Oct    33
Nov    34
Dec    32
dtype: int64
Mar    32
Apr    33
May    34
Jun    32
Jul    30
Aug    31
Sep    29
Oct    33
Nov    34
Dec    32
dtype: int64
Jan    30
Feb    31
Mar    32
dtype: int64
Jan    30
Feb    31
dtype: int64


In [47]:
# value count
gender_series = pd.Series(['Male', 'Female', 'Female', 'Male'])
print(gender_series.value_counts())

Female    2
Male      2
dtype: int64


In [48]:
# apply and map
temp = [30, 31, 32, 33, 34, 32, 30, 31, 29, 33, 34, 32]
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
data_series = pd.Series(temp, month)   # Here first list will consider as indexing

In [49]:
# We can pass or add series to our own created function
# Apply
# Take every element from series and apply function of converter
def converter(C):
    return ((C * 1.8) + 32)

print(data_series.apply(converter))

Jan    86.0
Feb    87.8
Mar    89.6
Apr    91.4
May    93.2
Jun    89.6
Jul    86.0
Aug    87.8
Sep    84.2
Oct    91.4
Nov    93.2
Dec    89.6
dtype: float64


In [50]:
# Map 
# take input argument as dictionary
# and map the values present in series 
# It will make unmatch values as NaN

dicti = {30: 300, 31: 310, 32: 100}
print(data_series.map(dicti))

Jan    300.0
Feb    310.0
Mar    100.0
Apr      NaN
May      NaN
Jun    100.0
Jul    300.0
Aug    310.0
Sep      NaN
Oct      NaN
Nov      NaN
Dec    100.0
dtype: float64
