# PANDAS

#Working with Pandas Series

In [2]:
import pandas as pd
import numpy as np
print(f"pandas version : {pd.__version__}")

pandas version : 1.5.3


#Creating Series

In [3]:
#series using list
lst=[3,4,5,6,7]
pd.Series(lst,index=[i for i in range(1,len(lst)+1)])

1    3
2    4
3    5
4    6
5    7
dtype: int64

In [4]:
#series using numpy array
data=pd.Series(np.array(lst))
print(data)


0    3
1    4
2    5
3    6
4    7
dtype: int32


In [5]:
#series using Dictionary
run_km = {'day_1' : 4000 , 'day_2': 5000 ,'day_3':3000}
print(run_km)
pd.Series(run_km)

{'day_1': 4000, 'day_2': 5000, 'day_3': 3000}


day_1    4000
day_2    5000
day_3    3000
dtype: int64

In [6]:
#using repeat function along with creating a series
#Pandas.Series.repeat() function repeats elements of a series.It returns a new series where each element of the current 
#series is repeated Consecutively a given number of times
repeated_series=pd.Series(4).repeat(10)
print(repeated_series)

0    4
0    4
0    4
0    4
0    4
0    4
0    4
0    4
0    4
0    4
dtype: int64


In [7]:
#use reset function to change the index accurately
pd.Series(3).repeat(3).reset_index(drop=True)

0    3
1    3
2    3
dtype: int64

In [8]:
#Code Indicates:
#1) 10 should be repeated 5 times and
#2) 20 should be repeated 2 times
s = pd.Series([10,20]).repeat([5,2]).reset_index(drop=True)

In [9]:
s

0    10
1    10
2    10
3    10
4    10
5    20
6    20
dtype: int64

In [10]:
lst=[10,20,30,1,34,23,21,22,1]
a=pd.Series(lst).reset_index(drop=True)
a

0    10
1    20
2    30
3     1
4    34
5    23
6    21
7    22
8     1
dtype: int64

In [11]:
s

0    10
1    10
2    10
3    10
4    10
5    20
6    20
dtype: int64

In [13]:
#Accessing Element
print(s[0])
print("-"*30)
print(s[5])
print("-"*30)
print(s[::-1])
print("-"*30)
print(s[1:])
print("-"*30)
print(s[-1:])

10
------------------------------
20
------------------------------
6    20
5    20
4    10
3    10
2    10
1    10
0    10
dtype: int64
------------------------------
1    10
2    10
3    10
4    10
5    20
6    20
dtype: int64
------------------------------
6    20
dtype: int64


In [14]:
b=pd.Series([3,4,5,6],index=[23,6,78,54])
b

23    3
6     4
78    5
54    6
dtype: int64

In [15]:
b[1:3]

6     4
78    5
dtype: int64

In [16]:
b[-1:]

54    6
dtype: int64

In [17]:
b[::-1]

54    6
78    5
6     4
23    3
dtype: int64

In [18]:
#Aggregate Function
sr=pd.Series([1,2,5,64,77,44,979,54])
print(sr.describe())
print("-"*30)
print(sr.agg([min,max,sum]))

count      8.000000
mean     153.250000
std      334.976225
min        1.000000
25%        4.250000
50%       49.000000
75%       67.250000
max      979.000000
dtype: float64
------------------------------
min       1
max     979
sum    1226
dtype: int64


In [19]:
#Series absolute Function
sr=pd.Series([1,-2,-3,332,-43,-22,43])
sr.abs()

0      1
1      2
2      3
3    332
4     43
5     22
6     43
dtype: int64

In [20]:
#Appending Series
sr1=pd.Series([1,34,676,43,88,-545,43,45])
sr2=pd.Series([32,56,77,-22,-76,-54,-32,85])

In [21]:
s3=sr1.append(sr2,ignore_index=True)
print(s3)
print(type(s3))

0       1
1      34
2     676
3      43
4      88
5    -545
6      43
7      45
8      32
9      56
10     77
11    -22
12    -76
13    -54
14    -32
15     85
dtype: int64
<class 'pandas.core.series.Series'>


  s3=sr1.append(sr2,ignore_index=True)


In [23]:
sr4=sr2.append(sr1,ignore_index=False)
print(sr4)
print(type(sr4))

0     32
1     56
2     77
3    -22
4    -76
5    -54
6    -32
7     85
0      1
1     34
2    676
3     43
4     88
5   -545
6     43
7     45
dtype: int64
<class 'pandas.core.series.Series'>


  sr4=sr2.append(sr1,ignore_index=False)


In [24]:
#Astype function
#Pandas "astype()" is one of the most important methods. It is used to change data type of a series. when dataframe is
#from csv file, the columns are imported and data type is set automatically which many times is not what
#it actually should have
print(type(sr1[0]))
print(type(sr1.astype("str")[0]))
print(type(sr1.astype("float")[0]))

<class 'numpy.int64'>
<class 'str'>
<class 'numpy.float64'>


In [25]:
#Between function :- used to check which values lie between first and second argument
sr1=pd.Series([1,2,3,4,30,55,32,87,99,43,5,6])
sr1

0      1
1      2
2      3
3      4
4     30
5     55
6     32
7     87
8     99
9     43
10     5
11     6
dtype: int64

In [26]:
sr1[sr1.between(10,50)]

4    30
6    32
9    43
dtype: int64

# All Strings functions can be used to extracted or modify text in a series

In [52]:
ser=pd.Series(["John","Jack","Machine Learning","Geeks for Geeks","Data Science"])
ser

0                John
1                Jack
2    Machine Learning
3     Geeks for Geeks
4        Data Science
dtype: object

In [53]:
#upper and Lower
print(ser.str.upper())
print("-"*30)
print(ser.str.lower())

0                JOHN
1                JACK
2    MACHINE LEARNING
3     GEEKS FOR GEEKS
4        DATA SCIENCE
dtype: object
------------------------------
0                john
1                jack
2    machine learning
3     geeks for geeks
4        data science
dtype: object


In [54]:
ser

0                John
1                Jack
2    Machine Learning
3     Geeks for Geeks
4        Data Science
dtype: object

In [56]:
#Length function
ser.str.len()

0     4
1     4
2    16
3    15
4    12
dtype: int64

In [57]:
#Strip functions
ser.str.strip()

0                John
1                Jack
2    Machine Learning
3     Geeks for Geeks
4        Data Science
dtype: object

In [58]:
#Split functions
ser.str.split()

0                 [John]
1                 [Jack]
2    [Machine, Learning]
3    [Geeks, for, Geeks]
4        [Data, Science]
dtype: object

In [59]:
ser.str.split()[2][1]

'Learning'

In [60]:
test=[i for i in ser.str.split()[2][1]]
test

['L', 'e', 'a', 'r', 'n', 'i', 'n', 'g']

In [61]:
#contains function
ser=pd.Series(["Geeks for Geeks","Hello@world","Machine Learning","Data@Science"])
ser[ser.str.contains("@")]

1     Hello@world
3    Data@Science
dtype: object

In [62]:
#Replace function
ser.str.replace("@"," ")

0     Geeks for Geeks
1         Hello world
2    Machine Learning
3        Data Science
dtype: object

In [65]:
#Count function
ser.str.count('e')

0    4
1    1
2    2
3    2
dtype: int64

In [66]:
ser

0     Geeks for Geeks
1         Hello@world
2    Machine Learning
3        Data@Science
dtype: object

In [72]:
#startswith and endswith
print(ser[ser.str.startswith("Geeks")])
print(ser[ser.str.endswith("Science")])

0    Geeks for Geeks
dtype: object
3    Data@Science
dtype: object


In [77]:
#Find Function
print(ser)
ser.str.find("Machine")

0     Geeks for Geeks
1         Hello@world
2    Machine Learning
3        Data@Science
dtype: object


0   -1
1   -1
2    0
3   -1
dtype: int64

In [81]:
#How to convert a series into a list
print(type(ser))
ser=ser.to_list()
print(ser)
print(type(ser))

<class 'pandas.core.series.Series'>
['Geeks for Geeks', 'Hello@world', 'Machine Learning', 'Data@Science']
<class 'list'>
