In [1]:
import pandas as pd
import numpy as np

## Series

In [2]:
# Creating a series 
# Pandas.series(data,index,dtype)

series1 = pd.Series(["One", "Two", "Three", "Four", "Five"])
print(series1)

0      One
1      Two
2    Three
3     Four
4     Five
dtype: object


In [3]:
# Creating a series with all elements have the same value

seriesScalar = pd.Series(100, index=[0,1,2,3,4])
print(seriesScalar)

0    100
1    100
2    100
3    100
4    100
dtype: int64


In [4]:
# Create a series from a dictionary

seriesDict = pd.Series({"Name": "Kiran", "Job": "Developer", "Born": 1950})
print(seriesDict)

Name        Kiran
Job     Developer
Born         1950
dtype: object


In [5]:
# Create an empty series

empty = pd.Series()
print(empty)

Series([], dtype: object)


In [6]:
# Attribute
# Check the series is empty or not

empty_series = pd.Series()
print(empty_series.empty)

non_empty_series = pd.Series([10,20,30])
print(non_empty_series.empty)

True
False


In [7]:
# Attribute
# Check the series contain NaN values
# using the hasnans attribute

data = [10, 20, 30, 40, np.NaN, 0]
data1 = [10, 20, 30, 40, 50, 0]

series5 = pd.Series(data)
series51 = pd.Series(data1)

print(series5.hasnans)
print(series51.hasnans)

True
False


In [8]:
# Attributes
# To print values from a series use value attribute

series1.values

array(['One', 'Two', 'Three', 'Four', 'Five'], dtype=object)

In [9]:
# Attributes
# Ndim returns the number of dimensions.

series1.ndim

1

In [10]:
# Attributes
# For a series, size will return the number of elements.

series1.size

5

In [11]:
# Attributes
# To get the type of elements use the dtype. 

series1.dtype

dtype('O')

In [12]:
# Attribute
# Series name and renaming 

seriesName = pd.Series([1, 2, 3, 4, 5], name="Custom_Name")
print(seriesName.name)

seriesName2 = seriesName.rename("New_name")
print(seriesName2.name)

Custom_Name
New_name


In [13]:
# Attributes
# To get the shape of the series. (Row, columns)
# Since row is only one the output will be (column_count, )

series1.shape

(5,)

In [14]:
# Index return the RangeIndex of the series
# RangeIndex is a memory-saving special case of an Index 
# limited to representing monotonic ranges with a 64-bit dtype.

series1.index
# RangeIndex(start=0, stop=5, step=1)

RangeIndex(start=0, stop=5, step=1)

In [15]:
# To get the index out of Series use index attribute.

seriesDict = pd.Series({"Name": "Kiran", "Job": "Developer", "Born": 1950})
print(seriesDict.index)

Index(['Name', 'Job', 'Born'], dtype='object')


In [16]:
# Info method will print details about the series

series1.info()

<class 'pandas.core.series.Series'>
RangeIndex: 5 entries, 0 to 4
Series name: None
Non-Null Count  Dtype 
--------------  ----- 
5 non-null      object
dtypes: object(1)
memory usage: 168.0+ bytes


In [17]:
# Describe method will print details about the series

series1.describe()

count       5
unique      5
top       One
freq        1
dtype: object

In [18]:
# Accessing an element in the series

data1 = [10, 20, 30, 40, 50]
series2 = pd.Series(data1)
print(data1)

elem1 = series2[0]
print(elem1)

elem2 = series2[4]
print(elem2)

[10, 20, 30, 40, 50]
10
50


In [19]:
# Creating a series

series3 = pd.Series(np.arange(0,100,5))
print(series3)

0      0
1      5
2     10
3     15
4     20
5     25
6     30
7     35
8     40
9     45
10    50
11    55
12    60
13    65
14    70
15    75
16    80
17    85
18    90
19    95
dtype: int32


In [20]:
# Elements access using index [ : stopIndex]
# Will return all elements from the start (index 0) to the element 
# till the (stopIndex-1), stopIndex will not be considered

print(series3[:5])

0     0
1     5
2    10
3    15
4    20
dtype: int32


In [21]:
# Elements access using index [startIndex : ]
# Return all elements from the start index to end.

print(series3[5:])

5     25
6     30
7     35
8     40
9     45
10    50
11    55
12    60
13    65
14    70
15    75
16    80
17    85
18    90
19    95
dtype: int32


In [22]:
# Elements access using index [startIndex : stopIndex]
# Return all the elements from the start index to stopIndex -1

print(series3[5:10])

5    25
6    30
7    35
8    40
9    45
dtype: int32


In [23]:
# Elements access using index [startIndex : -1]
# Return all elements from startIndex to lastIndex-1

print(series3[15:-1])

15    75
16    80
17    85
18    90
dtype: int32


In [24]:
# Elements access using index [startIndex : -Index]
# Return all elements from start to LastIndex-Index.

print(series3[:-15])

0     0
1     5
2    10
3    15
4    20
dtype: int32


In [25]:
# Create a series with labels.

seriesLabel = pd.Series([1,2,3,4,5],index=["One", "Two", "Three", "Four", "Five"])
print(seriesLabel)
print(seriesLabel.index)

One      1
Two      2
Three    3
Four     4
Five     5
dtype: int64
Index(['One', 'Two', 'Three', 'Four', 'Five'], dtype='object')


In [26]:
# Count method to find the number of elements in the series

seriesLabel = pd.Series([1,2,3,4,5],index=["One", "Two", "Three", "Four", "Five"])
print(seriesLabel.count())

5


In [27]:
# When we use count method NaN is not counted

data = [10, 20, 30, 40, 50, 0]
series5 = pd.Series(data)
print(series5.count())

data = [10, 20, 30, 40, np.NaN, 0]
series5 = pd.Series(data)
print(series5.count())

6
5


In [28]:
# Return the unique values in the series using
# the unique method

series4 = pd.Series([4,4,4,5,5,5,6,6,6])
series4_u = pd.unique(series4)
print(series4_u)

[4 5 6]


In [29]:
# Return the count of unique values in the series using
# the nunique method

series4 = pd.Series([4,4,4,5,5,5,6,6,6])
print(series4.nunique())

3


In [30]:
# Check the series having unique values 
# using the is_unique method

series4 = pd.Series([4,4,4,5,5,5,6,6,6])
print(series4.is_unique)

series4 = pd.Series([4,7,6,5,9,8,1,2,3])
print(series4.is_unique)

False
True


In [47]:
# Find the sum of the series using the sum method 

series5 = pd.Series([4,4,4,5,5,5,6,6,6])
sum5 = series5.sum()
print(sum5)

print(np.sum(series5))

45
45


In [49]:
# Find the product of the series using the product method 

series5 = pd.Series([4,1,5,2,10])
product5 = series5.product()
print(product5)

print(np.multiply(series5, 10))

400
0     40
1     10
2     50
3     20
4    100
dtype: int64


In [33]:
# Find the mean of the series using the mean method 

series5 = pd.Series([4,4,4,5,5,5,6,6,6])
mean5 = series5.mean()
print(mean5)

5.0


In [34]:
# Find the median of the series using the median method 

series5 = pd.Series([4,4,4,5,5,5,6,6,6])
median5 = series5.median()
print(median5)

5.0


In [35]:
# Create an array from the series

series4 = pd.Series([4,4,4,5,5,5,6,6,6], dtype=np.int16)
series4_arr = series4.array
print(series4_arr)

<PandasArray>
[4, 4, 4, 5, 5, 5, 6, 6, 6]
Length: 9, dtype: int16


In [36]:
series4 = pd.Series([4,4,4,5,5,5,6,6,6], dtype=np.int16)
series4_arr = series4.array
print(len(series4_arr))
print(series4_arr.dtype)

9
int16


In [37]:
# Return the absolute value of elements
# use abs method

data = np.linspace(-1, 1, 10)
series5 = pd.Series(data)
print(series5)
print()
print(series5.abs())

0   -1.000000
1   -0.777778
2   -0.555556
3   -0.333333
4   -0.111111
5    0.111111
6    0.333333
7    0.555556
8    0.777778
9    1.000000
dtype: float64

0    1.000000
1    0.777778
2    0.555556
3    0.333333
4    0.111111
5    0.111111
6    0.333333
7    0.555556
8    0.777778
9    1.000000
dtype: float64


In [38]:
# Find the standard deviation of the series 
# using the std method

data = np.linspace(0, 30, 10)
series5 = pd.Series(data)
print(series5.std())

10.09216784699164


In [39]:
# Find the min value of the series 
# using the min method

data = np.linspace(10, 30, 10)
series5 = pd.Series(data)
print(series5)
print()
print(series5.min())

0    10.000000
1    12.222222
2    14.444444
3    16.666667
4    18.888889
5    21.111111
6    23.333333
7    25.555556
8    27.777778
9    30.000000
dtype: float64

10.0


In [40]:
# Find the max value of the series 
# using the max method

data = np.linspace(10, 30, 10)
series5 = pd.Series(data)
print(series5)
print()
print(series5.max())

0    10.000000
1    12.222222
2    14.444444
3    16.666667
4    18.888889
5    21.111111
6    23.333333
7    25.555556
8    27.777778
9    30.000000
dtype: float64

30.0


In [41]:
# Simple arithmetics on pandas (element wise)
data = np.arange(0, 100,5)
series6 = pd.Series(data)

print(series6[series6>80])
print()
print(series6[series6<10])
print()
print(series6[series6==50])

17    85
18    90
19    95
dtype: int32

0    0
1    5
dtype: int32

10    50
dtype: int32


In [42]:
print(series6[series6>80]**2)
print()
print(series6[series6<10]**2)
print()
print(series6[series6==50]**2)

17    7225
18    8100
19    9025
dtype: int32

0     0
1    25
dtype: int32

10    2500
dtype: int32


In [43]:
print(series6[series6>80]+100)
print()
print(series6[series6<10]+100)
print()
print(series6[series6==50]+100)

17    185
18    190
19    195
dtype: int32

0    100
1    105
dtype: int32

10    150
dtype: int32


In [44]:
print(series6[series6>80]-50)
print()
print(series6[series6<10]-50)
print()
print(series6[series6==50]-50)

17    35
18    40
19    45
dtype: int32

0   -50
1   -45
dtype: int32

10    0
dtype: int32


In [45]:
print(series6[series6>80]/10)
print()
print(series6[series6<10]/10)
print()
print(series6[series6==50]/10)

17    8.5
18    9.0
19    9.5
dtype: float64

0    0.0
1    0.5
dtype: float64

10    5.0
dtype: float64


In [46]:
np.sum(data)

950

In [51]:
# Check an element is available in a series using "in"

print(100 in series6)
print(10 in series6)

False
True
