<a href="https://colab.research.google.com/github/maushamkumar/Pandas/blob/main/Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# What is Pandas?
Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language.

# Pandas Series
A Pandas Series is like a column in a table. It is a one-dimensional array holding data of any type.

# Importing Pandas

In [None]:
import numpy as np
import pandas as pd

# Series from lists

In [None]:
# String
country = ['India', 'Pakistan', 'USA', 'Nepal', 'Srilanka']
pd.Series(country)
# Whenever you create a series, values and index
# In pandas object also know as string in Pandas


0       India
1    Pakistan
2         USA
3       Nepal
4    Srilanka
dtype: object

In [None]:
# Integers
runs = [13, 24, 56, 78, 100]

In [None]:
pd.Series(runs)

0     13
1     24
2     56
3     78
4    100
dtype: int64

In [None]:
# Coustom index
marks = [67, 57, 89, 90, 100]
subjects = ['English', 'Maths', 'Science', 'Social', 'Hindi']
pd.Series(marks, index=subjects) # marks will assign as value and subjects as index

English     67
Maths       57
Science     89
Social      90
Hindi      100
dtype: int64

In [None]:
# Setting a name for the series
marks = [67, 57, 89, 90, 100]
subjects = ['English', 'Maths', 'Science', 'Social', 'Hindi']
pd.Series(marks, index=subjects, name='Mausham ke Marks')
# Marks is a reference variable where Mausham ke Marks is an attribute of that object.

English     67
Maths       57
Science     89
Social      90
Hindi      100
Name: Mausham ke Marks, dtype: int64

In [None]:
marks = pd.Series(marks, index=subjects, name='Marks')

In [None]:
marks

English     67
Maths       57
Science     89
Social      90
Hindi      100
Name: Marks, dtype: int64

# Series from dict

In [None]:
marks1 = {'English': 67, 'Maths': 57, 'Science': 89, 'Social': 90, 'Hindi': 100}
pd.Series(marks1)
# IF you pass dict in series then the key will become the index and the value will become values

English     67
Maths       57
Science     89
Social      90
Hindi      100
dtype: int64

# Series Attributes

In [None]:
# Size of the series => number of items present inside your series
marks.size

5

In [None]:
# dtype => data type of series
marks.dtype

dtype('int64')

In [None]:
# name = => name of the series
marks.name

'Marks'

In [None]:
# is_unique => check if the index is unique
marks.is_unique

True

In [None]:
pd.Series([1, 2,3 ,4, 55, 5, 6, 4, 3, 2, 2,1 ]).is_unique

False

In [None]:
# Index => index of the series
marks.index

Index(['English', 'Maths', 'Science', 'Social', 'Hindi'], dtype='object')

In [None]:
# values => values of the series
marks.values

array([ 67,  57,  89,  90, 100])

# Series using read_csv

In [None]:
# with one col
ipl = pd.read_csv('/content/IPLcsv.csv')

In [None]:
df = pd.read_csv('/content/subs.csv', header=None).squeeze()


In [None]:
type(df)

In [None]:
df

0      Subscribers gained
1                      48
2                      57
3                      40
4                      43
              ...        
361                   231
362                   226
363                   155
364                   144
365                   172
Name: 0, Length: 366, dtype: object

# Series methods

In [None]:
# Head  => First Five rows and you can specify any number of rows
print(df.head())
print()
print()
df.head(10)

0    Subscribers gained
1                    48
2                    57
3                    40
4                    43
Name: 0, dtype: object




0    Subscribers gained
1                    48
2                    57
3                    40
4                    43
5                    44
6                    46
7                    33
8                    40
9                    44
Name: 0, dtype: object

In [None]:
# Tail => Last 5 rows and you can specify any number of rows
print(df.tail())
print()
print()
df.tail(2)

361    231
362    226
363    155
364    144
365    172
Name: 0, dtype: object




364    144
365    172
Name: 0, dtype: object

In [None]:
# Sample => Random show you one row and you can specify any number of rows
df.sample(5)

285    269
162    123
178    152
252    182
154    117
Name: 0, dtype: object

In [None]:
# Value_counts => value_counts()
ipl.value_counts()

ID       innings  overs  ballnumber  batter            bowler             non-striker       extra_type  batsman_run  extras_run  total_run  non_boundary  isWicketDelivery  player_out        kind     fielders_involved  BattingTeam                
336010   1        8      6           DJ Hussey         A Kumble           T Taibu           legbyes     0            1           1          0             1                 DJ Hussey         run out  DW Steyn           Kolkata Knight Riders          1
1136573  2        0      6           JJ Roy            PP Chawla          G Gambhir         wides       0            1           1          0             1                 JJ Roy            stumped  KD Karthik         Delhi Daredevils               1
829803   1        5      5           DA Miller         YS Chahal          GJ Bailey         wides       0            1           1          0             1                 DA Miller         stumped  KD Karthik         Kings XI Punjab                1
8

In [None]:
# sort_values => inplace => ascending
df.sort_values(by = 'count', inplace = True, ascending = False)
df

TypeError: Series.sort_values() got an unexpected keyword argument 'by'

# Some Important Series Methods
* astype
* between
* clip
* drop_duplicates
* isnull
* dropna
* fillna
* isin
* apply
* copy

In [None]:
# astype => convert datatype
df['count'] = df['count'].astype('int64')
df

KeyError: 'count'

In [None]:
df.dtypes

dtype('O')

In [None]:
ipl = pd.read_csv('/content/IPLcsv.csv')

In [None]:
# atype -> change the type of data and the reduce the memory power

In [None]:
# Between => range
df.between(50, 100)


TypeError: '>=' not supported between instances of 'str' and 'int'

In [None]:
# Clip => clip the data in given range
df.clip(50, 100) # lower than 50 will be 50 and higher than 100 will be 100

TypeError: '<=' not supported between instances of 'str' and 'int'

In [None]:
# drop_duplicates => drop the duplicate values (keep the first one) this will delete the first one
temp = pd.Series([1,1,2,2,3,3,4,4])
temp.drop_duplicates()

0    1
2    2
4    3
6    4
dtype: int64

In [None]:
# duplicated => check the duplicate values
temp.duplicated().sum()

4

In [None]:
# isnull => check the null values
temp1 = pd.Series([1, 2, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10])
temp1.isnull()

0    False
1    False
2    False
3     True
4    False
5    False
6     True
7    False
8     True
9    False
dtype: bool

In [None]:
# dropna => drop the null values
temp1.dropna()

0     1.0
1     2.0
2     3.0
4     5.0
5     6.0
7     8.0
9    10.0
dtype: float64

In [None]:
# isnull => check the null values
temp1.isnull().sum()

3

In [None]:
# fillna => fill the null values
temp1.fillna(0)

0     1.0
1     2.0
2     3.0
3     0.0
4     5.0
5     6.0
6     0.0
7     8.0
8     0.0
9    10.0
dtype: float64

In [None]:
temp1.fillna(temp1.mean())

0     1.0
1     2.0
2     3.0
3     5.0
4     5.0
5     6.0
6     5.0
7     8.0
8     5.0
9    10.0
dtype: float64

In [None]:
# isin = => check the values
df.isin([67, 49, 43])

0      False
1      False
2      False
3      False
4      False
       ...  
361    False
362    False
363    False
364    False
365    False
Name: 0, Length: 366, dtype: bool

In [None]:
# apply => apply the function
df.apply(len)

0      18
1       2
2       2
3       2
4       2
       ..
361     3
362     3
363     3
364     3
365     3
Name: 0, Length: 366, dtype: int64

In [None]:
# Copy => copy the data
df.copy()

0      Subscribers gained
1                      48
2                      57
3                      40
4                      43
              ...        
361                   231
362                   226
363                   155
364                   144
365                   172
Name: 0, Length: 366, dtype: object