#### Pandas-DataFrame And Series
Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A Series is a one-dimensional array-like object, while a DataFrame is a two-dimensional, size-mutable, and potentially heterogeneous tabular data structure with labeled axes (rows and columns).

In [1]:
import pandas as pd

In [3]:
## Serias
## A Pandas Series is a one-dimensional array-like object that can hold any data type. It is similar to a column in a table.

import pandas as pd

data = [1,2,3,4,5]
series = pd.Series(data)
print(series)
print(type(series))

0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [4]:
## Create a Series from Dictionary

data = {"a": 1, "b": 2, "c": 3}
series_dict = pd.Series(data)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [5]:
data = [10,20,30]
index = ["a","b","c"]
pd.Series(data, index=index)

a    10
b    20
c    30
dtype: int64

In [2]:
## Dataframe
## Create a Dataframe from a Dictionary of list

import pandas as pd

data = {
    "Name": ["Tamas", "John", "Jack"],
    "Age": [26, 30, 45],
    "City": ["Gabcikovo", "New York", "Chicago"]
}

df = pd.DataFrame(data)
print(df)
print(type(df))

    Name  Age       City
0  Tamas   26  Gabcikovo
1   John   30   New York
2   Jack   45    Chicago
<class 'pandas.core.frame.DataFrame'>


In [3]:
import numpy as np

np.array(df)

array([['Tamas', 26, 'Gabcikovo'],
       ['John', 30, 'New York'],
       ['Jack', 45, 'Chicago']], dtype=object)

In [4]:
## Create a Dataframe from a List of Dictionaries

import pandas as pd

data = [
    {"Name": "Tamas", "Age": 26, "City": "Gabcikovo"},
    {"Name": "John", "Age": 22, "City": "New York"},
    {"Name": "Jack", "Age": 26, "City": "Gabcikovo"},
    {"Name": "Tamas", "Age": 26, "City": "Gabcikovo"},
]

df = pd.DataFrame(data)
print(df)
print(type(df))


    Name  Age       City
0  Tamas   26  Gabcikovo
1   John   22   New York
2   Jack   26  Gabcikovo
3  Tamas   26  Gabcikovo
<class 'pandas.core.frame.DataFrame'>


In [6]:
df = pd.read_csv("data.csv")
print(df.head(5))

         Date Category  Value   Product  Sales Region
0  2023-01-01        A   28.0  Product1  754.0   East
1  2023-01-02        B   39.0  Product3  110.0  North
2  2023-01-03        C   32.0  Product2  398.0   East
3  2023-01-04        B    8.0  Product1  522.0   East
4  2023-01-05        B   26.0  Product3  869.0  North


In [None]:
df = pd.read_csv("data.csv")
df.head(5) # First 5

Unnamed: 0,Date,Category,Value,Product,Sales,Region
0,2023-01-01,A,28.0,Product1,754.0,East
1,2023-01-02,B,39.0,Product3,110.0,North
2,2023-01-03,C,32.0,Product2,398.0,East
3,2023-01-04,B,8.0,Product1,522.0,East
4,2023-01-05,B,26.0,Product3,869.0,North


In [None]:
df.tail(5) # Last 5

Unnamed: 0,Date,Category,Value,Product,Sales,Region
45,2023-02-15,B,99.0,Product2,599.0,West
46,2023-02-16,B,6.0,Product1,938.0,South
47,2023-02-17,B,69.0,Product3,143.0,West
48,2023-02-18,C,65.0,Product3,182.0,North
49,2023-02-19,C,11.0,Product3,708.0,North


In [56]:
### Accessing Data from Dataframe

import pandas as pd

data = {
    "Name": ["Tamas", "John", "Jack"],
    "Age": [26, 30, 45],
    "City": ["Gabcikovo", "New York", "Chicago"]
}

df_1 = pd.DataFrame(data)
df_1

Unnamed: 0,Name,Age,City
0,Tamas,26,Gabcikovo
1,John,30,New York
2,Jack,45,Chicago


In [6]:
print(type(df_1["Name"]))
(df_1["Name"])

<class 'pandas.core.series.Series'>


0    Tamas
1     John
2     Jack
Name: Name, dtype: object

In [11]:
df_1.loc[0]

Name        Tamas
Age            26
City    Gabcikovo
Name: 0, dtype: object

In [15]:
df_1.iloc[0][2]

  df_1.iloc[0][2]


'Gabcikovo'

In [19]:
df_1

Unnamed: 0,Name,Age,City
0,Tamas,26,Gabcikovo
1,John,30,New York
2,Jack,45,Chicago


In [17]:
## Accessing a Specified Element

df_1.at[1,"Age"]

np.int64(30)

In [20]:
df_1.at[1,"Name"]

'John'

In [21]:
## Accessing a specified element using iat

df_1.iat[2,2]

'Chicago'

In [24]:
### Data Manipulation with Dataframes

df_1

Unnamed: 0,Name,Age,City
0,Tamas,26,Gabcikovo
1,John,30,New York
2,Jack,45,Chicago


In [40]:
df_1["Salary"] = [50000, 60000, 70000] # Adding a Column
df_1

Unnamed: 0,Name,Age,City,Salary
0,Tamas,26,Gabcikovo,50000
1,John,30,New York,60000
2,Jack,45,Chicago,70000


In [41]:
## Remove a column

df_1.drop("Salary", axis=1) # Temporary

Unnamed: 0,Name,Age,City
0,Tamas,26,Gabcikovo
1,John,30,New York
2,Jack,45,Chicago


In [42]:
df_1.drop("Salary", axis=1, inplace=True) # Permanent

In [43]:
df_1

Unnamed: 0,Name,Age,City
0,Tamas,26,Gabcikovo
1,John,30,New York
2,Jack,45,Chicago


In [57]:
## Add age to the column

df_1["Age"] = df_1["Age"]+1
df_1

Unnamed: 0,Name,Age,City
0,Tamas,27,Gabcikovo
1,John,31,New York
2,Jack,46,Chicago


In [None]:
df_1.drop(0) # Temporary

Unnamed: 0,Name,Age,City
1,John,31,New York
2,Jack,46,Chicago


In [58]:
df_1.drop(0, inplace=True) # Permanent

In [59]:
df_1

Unnamed: 0,Name,Age,City
1,John,31,New York
2,Jack,46,Chicago


In [60]:
df=pd.read_csv('data.csv')


# Display the data types of each column
print("Data types:\n", df.dtypes)

# Describe the DataFrame
print("Statistical summary:\n", df.describe())

Data types:
 Date         object
Category     object
Value       float64
Product      object
Sales       float64
Region       object
dtype: object
Statistical summary:
            Value       Sales
count  47.000000   46.000000
mean   51.744681  557.130435
std    29.050532  274.598584
min     2.000000  108.000000
25%    27.500000  339.000000
50%    54.000000  591.500000
75%    70.000000  767.500000
max    99.000000  992.000000
