<a href="https://colab.research.google.com/github/solverharsh/Learning_Pandas/blob/main/pandas_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pandas
* Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language.

# Pandas Series
* A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

# Importing Pandas -

In [None]:
import numpy as np
import pandas as pd

# Series from lists

In [None]:
# string
country = ['India','Pakistan','Nepal','Uk','Australia']
pd.Series(country)

Unnamed: 0,0
0,India
1,Pakistan
2,Nepal
3,Uk
4,Australia


In [None]:
# integers
runs = [13,24,56,78,100]
pd.Series(runs)

Unnamed: 0,0
0,13
1,24
2,56
3,78
4,100


In [None]:
# custom index
marks = [67,57,89,100]
subjects = ['maths','english','science','hindi']

pd.Series(marks , index = subjects)

Unnamed: 0,0
maths,67
english,57
science,89
hindi,100


In [None]:
# setting a name to series
marks = pd.Series(marks ,index = subjects,name = 'Harsh ke marks')
marks

Unnamed: 0,Harsh ke marks
maths,67
english,57
science,89
hindi,100


# Series from dict

In [None]:
marks = {
    'maths':67,
    'english':57,
    'science':89,
    'hindi':100
}

marks_series = pd.Series(marks,name = 'Harsh ke marks')
marks_series

Unnamed: 0,Harsh ke marks
maths,67
english,57
science,89
hindi,100


# Series Attributes

In [None]:
# size
marks_series.size

4

In [None]:
# dtype
marks_series.dtype

dtype('int64')

In [None]:
# name
marks_series.name

'Harsh ke marks'

In [None]:
# is_unique
marks_series.is_unique

True

In [None]:
pd.Series([1,2,3,4,4,3]).is_unique

False

In [None]:
# index
marks_series.index

Index(['maths', 'english', 'science', 'hindi'], dtype='object')

In [None]:
marks_series.values

array([ 67,  57,  89, 100])

# Series using read_csv

In [None]:
# with one column
subs = pd.read_csv('/content/subs.csv')
subs

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44
...,...
360,231
361,226
362,155
363,144


In [None]:
print(type(subs))

<class 'pandas.core.frame.DataFrame'>


In [None]:
# with 2 cols
vk = pd.read_csv('/content/kohli_ipl.csv',index_col='match_no')
vk

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1
...,...
211,0
212,20
213,73
214,25


In [None]:
subs.head()

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44


In [None]:
subs.tail()

Unnamed: 0,Subscribers gained
360,231
361,226
362,155
363,144
364,172


In [None]:
vk.head()

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
1,1
2,23
3,13
4,12
5,1


In [None]:
vk.tail()

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
211,0
212,20
213,73
214,25
215,7


In [None]:
subs.head(3)

Unnamed: 0,Subscribers gained
0,48
1,57
2,40


In [None]:
vk.tail(5)

Unnamed: 0_level_0,runs
match_no,Unnamed: 1_level_1
211,0
212,20
213,73
214,25
215,7


In [None]:
movies = pd.read_csv('/content/bollywood.csv',index_col='movie')
movies

Unnamed: 0_level_0,lead
movie,Unnamed: 1_level_1
Uri: The Surgical Strike,Vicky Kaushal
Battalion 609,Vicky Ahuja
The Accidental Prime Minister (film),Anupam Kher
Why Cheat India,Emraan Hashmi
Evening Shadows,Mona Ambegaonkar
...,...
Hum Tumhare Hain Sanam,Shah Rukh Khan
Aankhen (2002 film),Amitabh Bachchan
Saathiya (film),Vivek Oberoi
Company (film),Ajay Devgn


In [None]:
# sample
movies.sample(5)

Unnamed: 0_level_0,lead
movie,Unnamed: 1_level_1
Sui Dhaaga,Anushka Sharma
Bubble Gum (film),Apoorva Arora
Satyameva Jayate (2018 film),John Abraham
The Sky Is Pink,Zaira Wasim
Fareb (2005 film),Shilpa Shetty Kundra


In [None]:
# value_counts -> movies
movies.value_counts()

Unnamed: 0_level_0,count
lead,Unnamed: 1_level_1
Akshay Kumar,48
Amitabh Bachchan,45
Ajay Devgn,38
Salman Khan,31
Sanjay Dutt,26
...,...
Naveen Bawa,1
Naushaad Abbas,1
Nauheed Cyrusi,1
Natalia Janoszek,1


In [None]:
import pandas as pd

# Example Series
data = pd.Series([1, 2, 2, 3, 3, 3, 4])

# Get counts of unique values
counts = data.value_counts()

print(counts)


3    3
2    2
1    1
4    1
Name: count, dtype: int64


In [None]:
# sort_values -> inplace
marks_series.sort_values()

Unnamed: 0,Harsh ke marks
english,57
maths,67
science,89
hindi,100


In [None]:
marks_series.sort_values(ascending=False,inplace = True)

In [None]:
marks_series

Unnamed: 0,Harsh ke marks
hindi,100
science,89
maths,67
english,57


In [None]:
# count
vk.count()

Unnamed: 0,0
runs,215


`Example 1` : **Creating a Series from a List**

In [None]:
import pandas as pd

# Creating a Series from a list
data = [10, 20, 30, 40]
series = pd.Series(data)

print(series)


0    10
1    20
2    30
3    40
dtype: int64


`Example 2` **: Creating a Series with Custom Indices**

In [None]:
# Creating a Series with custom indices
data = [10, 20, 30, 40]
indices = ['a', 'b', 'c', 'd']
series = pd.Series(data, index=indices)

print(series)


a    10
b    20
c    30
d    40
dtype: int64


`Example 3` **: Creating a Series from a Dictionary**

In [None]:
# Creating a Series from a dictionary
data = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
series = pd.Series(data)

print(series)


a    10
b    20
c    30
d    40
dtype: int64


`Example 4` **: Creating a Series from a NumPy Array**

In [None]:
import numpy as np

# Creating a Series from a NumPy array
data = np.array([10, 20, 30, 40])
series = pd.Series(data)

print(series)


0    10
1    20
2    30
3    40
dtype: int64


# Accessing Elements in a Series

`Example 1` **: Accessing Single Element by Index**


In [None]:
# Accessing a single element by index
print(series['a'])


10


`Example 2` **: Accessing Multiple Elements by Index**

In [None]:
# Accessing multiple elements by index
print(series[['a', 'c']])  # Output: a    10, c    30


a    10
c    30
dtype: int64


Example 3: Accessing by Position (Integer Location) .

You can use .iloc[] for positional indexing.

In [None]:
# Accessing by position
print(series.iloc[0])  # Output: 10 (first element)
print(series.iloc[1:3])  # Output: b    20, c    30 (slicing)


10
b    20
c    30
dtype: int64


# 3. Modifying Elements in a Series
* You can modify the values in a Series just like a dictionary.

Example 1: Changing a Single Element

In [None]:
# Changing an element
series['a'] = 100
print(series)


a    100
b     20
c     30
d     40
dtype: int64


Example 2: Changing Multiple Elements

In [None]:
# Changing multiple elements
series[['b', 'd']] = [200, 400]
print(series)


a    100
b    200
c     30
d    400
dtype: int64


# Basic Operations on Series

Example 1: Basic Arithmetic Operations

In [None]:
# Performing arithmetic operations
series = pd.Series([1, 2, 3, 4])

# Adding 10 to each element
print(series + 10)

# Multiplying each element by 2
print(series * 2)


0    11
1    12
2    13
3    14
dtype: int64
0    2
1    4
2    6
3    8
dtype: int64


Example 2: Statistical Operations

In [None]:
# Basic statistics on Series
print(series.mean())  # Output: 2.5 (mean)
print(series.sum())   # Output: 10 (sum)
print(series.max())   # Output: 4 (max)
print(series.min())   # Output: 1 (min)


2.5
10
4
1


# 5. Handling Missing Data (NaN) in a Series
* A Series can contain missing values represented by NaN. Pandas provides several functions for handling NaN values.

Example 1: Creating a Series with NaN

In [None]:
# Series with NaN value
import numpy as np
series_with_nan = pd.Series([1, 2, np.nan, 4])

print(series_with_nan)


0    1.0
1    2.0
2    NaN
3    4.0
dtype: float64


Example 2: Checking for NaN values

In [None]:
# Checking for NaN values
print(series_with_nan.isna())


0    False
1    False
2     True
3    False
dtype: bool


Example 3: Dropping NaN values

In [None]:
# Dropping NaN values
cleaned_series = series_with_nan.dropna()
print(cleaned_series)

0    1.0
1    2.0
3    4.0
dtype: float64


# Applying Functions to a Series
* You can apply functions element-wise to a Series using methods like `.apply()`.

In [None]:
# Applying a function to each element
squared_series = series.apply(lambda x: x ** 2)
print(squared_series)


0     1
1     4
2     9
3    16
dtype: int64


# Summary:
* A Series is a one-dimensional array in Pandas with labels (indices).

* You can create a Series from a list, dictionary, or NumPy array.

* Access elements using both index labels or integer positions.

* Perform mathematical and statistical operations on a Series.

* Handle missing data using NaN and functions like .isna() and .dropna().

* You can apply custom functions to each element of a Series.