In [3]:
# To load the pandas package and start working with it, import the package.
import pandas as pd
import numpy as np

In [5]:
# Manually store data in a table create a DataFrame.
# A DataFrame is a 2-dimensional data structure that can store data of different types (including characters, integers, floating point values, categorical data and more) in columns.
df = pd.DataFrame({
    "Name" : ["Shivam Sing",
              "Vivek Panday",
              "Raju Shrivastava",
              "Shreya Singh"],
     "Age" : [25,40,30,35],
    "Sex" : ["Male","Male","Male","Female"]              

})

In [7]:
df

Unnamed: 0,Name,Age,Sex
0,Shivam Sing,25,Male
1,Vivek Panday,40,Male
2,Raju Shrivastava,30,Male
3,Shreya Singh,35,Female


Data Selection

In [10]:
# Data Selection

# 1 Getitem ([]) -- For a DataFrame, passing a single label selects a columns

df["Name"]

0         Shivam Sing
1        Vivek Panday
2    Raju Shrivastava
3        Shreya Singh
Name: Name, dtype: object

In [12]:
# I’m just interested in working with the data in the column Age
# When selecting a single column of a pandas DataFrame, the result is a pandas Series. To select the column, use the column label in between square brackets [].
df["Name"]

0         Shivam Sing
1        Vivek Panday
2    Raju Shrivastava
3        Shreya Singh
Name: Name, dtype: object

In [42]:
# it is used to select the first row of a Pandas DataFrame.
df[0:1]

Unnamed: 0,Name,Age,Sex
0,Shivam Sing,25,Male


In [16]:
df[0:2]

Unnamed: 0,Name,Age,Sex
0,Shivam Sing,25,Male
1,Vivek Panday,40,Male


In [18]:
# 2  Selection by label
# See more in Selection by Label using DataFrame.loc() or DataFrame.at().
df.loc[1]

Name    Vivek Panday
Age               40
Sex             Male
Name: 1, dtype: object

In [55]:
# retrieves the row where the index is 1.
df.loc[1]

Name    Vivek Panday
Age               40
Sex             Male
Name: 1, dtype: object

In [20]:
df.loc[3]

Name    Shreya Singh
Age               35
Sex           Female
Name: 3, dtype: object

In [22]:
print(df.loc[:, "Name"]) 

0         Shivam Sing
1        Vivek Panday
2    Raju Shrivastava
3        Shreya Singh
Name: Name, dtype: object


In [24]:
print(df.loc[df["Name"] == "Vivek Panday"])

           Name  Age   Sex
1  Vivek Panday   40  Male


In [26]:
# 3  Selection by position
# See more in Selection by Position using DataFrame.iloc() or DataFrame.iat().

# Select via the position of the passed integers:

df.iloc[3]

Name    Shreya Singh
Age               35
Sex           Female
Name: 3, dtype: object

In [28]:
df.iloc[1:3, 0:2]

Unnamed: 0,Name,Age
1,Vivek Panday,40
2,Raju Shrivastava,30


In [29]:
# Boolean indexing
# Select rows where df.A is greater than 0
df[df["Age"] > 0]

Unnamed: 0,Name,Age,Sex
0,Shivam Sing,25,Male
1,Vivek Panday,40,Male
2,Raju Shrivastava,30,Male
3,Shreya Singh,35,Female


In [32]:
df[df["Age"]> 25]

Unnamed: 0,Name,Age,Sex
1,Vivek Panday,40,Male
2,Raju Shrivastava,30,Male
3,Shreya Singh,35,Female


In [32]:

# Adding a new column "Salary"
df["Salary"] = [50000, 60000, 55000, 70000]
df["Country"] = "India"  # Assigns "India" to all rows


In [34]:
df

Unnamed: 0,Name,Age,Sex,Salary,Country
0,Shivam Sing,25,Male,50000,India
1,Vivek Panday,40,Male,60000,India
2,Raju Shrivastava,30,Male,55000,India
3,Shreya Singh,35,Female,70000,India


In [36]:
# I want to know the maximum Age
df["Age"].max()

40

In [38]:
# I want to know the minmum Age
df["Age"].min()

25

In [40]:
# some basic statistics of the numerical data of my data table
df["Age"].describe()

count     4.000000
mean     32.500000
std       6.454972
min      25.000000
25%      28.750000
50%      32.500000
75%      36.250000
max      40.000000
Name: Age, dtype: float64

In [42]:
df.count()

Name       4
Age        4
Sex        4
Salary     4
Country    4
dtype: int64

In [44]:
df.describe()

Unnamed: 0,Age,Salary
count,4.0,4.0
mean,32.5,58750.0
std,6.454972,8539.125638
min,25.0,50000.0
25%,28.75,53750.0
50%,32.5,57500.0
75%,36.25,62500.0
max,40.0,70000.0


In [69]:
# Reading or Importing a CSV file into a pandas DataFrame
# read_file = pd.read_csv(r"C:\Users\HP\Downloads\sample_data.csv")
load_data = pd.read_csv("C:/Users/HP/Downloads/sampledata.csv")
load_data


Unnamed: 0,First Name,Last Name,Age,City,Country,Email,Phone,Salary
0,John,Doe,25.0,New York,USA,john.doe@example.com,123-456-7890,50000.0
1,Alice,Smith,30.0,Los Angeles,USA,alice.smith@example.com,987-654-3210,60000.0
2,Bob,Johnson,28.0,Chicago,USA,,555-666-7777,55000.0
3,John,Doe,25.0,New York,USA,john.doe@example.com,,50000.0
4,Charlie,Brown,,Houston,USA,charlie.b@example.com,222-333-4444,52000.0
5,Emily,Davis,35.0,Phoenix,USA,,111-222-3333,
6,Frank,Miller,40.0,,USA,frank.m@example.com,999-888-7777,70000.0
7,Grace,Wilson,27.0,San Diego,USA,grace.w@example.com,,62000.0
8,Hank,Taylor,,Dallas,USA,hank.t@example.com,777-888-9999,58000.0
9,Ivy,Martinez,29.0,San Jose,USA,,666-555-4444,61000.0


working with missing Data

In [51]:
# working with missing Data
# In pandas, missing data is represented by NaN (Not a Number), which comes from NumPy (np.nan). 
# By default, pandas ignores NaN values in computations but provides various methods to handle them.
# Reindexing allows you to change/add/delete the index on a specified axis. This returns a copy of the data:

In [71]:
load_data.isnull().sum()

First Name    0
Last Name     0
Age           4
City          3
Country       0
Email         7
Phone         4
Salary        3
dtype: int64

In [73]:
load_data.isna()

Unnamed: 0,First Name,Last Name,Age,City,Country,Email,Phone,Salary
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,True,False
4,False,False,True,False,False,False,False,False
5,False,False,False,False,False,True,False,True
6,False,False,False,True,False,False,False,False
7,False,False,False,False,False,False,True,False
8,False,False,True,False,False,False,False,False
9,False,False,False,False,False,True,False,False


In [77]:
load_data.fillna(27)

Unnamed: 0,First Name,Last Name,Age,City,Country,Email,Phone,Salary
0,John,Doe,25.0,New York,USA,john.doe@example.com,123-456-7890,50000.0
1,Alice,Smith,30.0,Los Angeles,USA,alice.smith@example.com,987-654-3210,60000.0
2,Bob,Johnson,28.0,Chicago,USA,27,555-666-7777,55000.0
3,John,Doe,25.0,New York,USA,john.doe@example.com,27,50000.0
4,Charlie,Brown,27.0,Houston,USA,charlie.b@example.com,222-333-4444,52000.0
5,Emily,Davis,35.0,Phoenix,USA,27,111-222-3333,27.0
6,Frank,Miller,40.0,27,USA,frank.m@example.com,999-888-7777,70000.0
7,Grace,Wilson,27.0,San Diego,USA,grace.w@example.com,27,62000.0
8,Hank,Taylor,27.0,Dallas,USA,hank.t@example.com,777-888-9999,58000.0
9,Ivy,Martinez,29.0,San Jose,USA,27,666-555-4444,61000.0


In [79]:
load_data["Age"].fillna(method='ffill')


  load_data["Age"].fillna(method='ffill')


0     25.0
1     30.0
2     28.0
3     25.0
4     25.0
5     35.0
6     40.0
7     27.0
8     27.0
9     29.0
10    25.0
11    30.0
12    28.0
13    25.0
14    25.0
15    35.0
16    40.0
17    27.0
18    27.0
19    29.0
20    33.0
21    31.0
22    37.0
Name: Age, dtype: float64

In [81]:
load_data.fillna(method='bfill')

  load_data.fillna(method='bfill')


Unnamed: 0,First Name,Last Name,Age,City,Country,Email,Phone,Salary
0,John,Doe,25.0,New York,USA,john.doe@example.com,123-456-7890,50000.0
1,Alice,Smith,30.0,Los Angeles,USA,alice.smith@example.com,987-654-3210,60000.0
2,Bob,Johnson,28.0,Chicago,USA,john.doe@example.com,555-666-7777,55000.0
3,John,Doe,25.0,New York,USA,john.doe@example.com,222-333-4444,50000.0
4,Charlie,Brown,35.0,Houston,USA,charlie.b@example.com,222-333-4444,52000.0
5,Emily,Davis,35.0,Phoenix,USA,frank.m@example.com,111-222-3333,70000.0
6,Frank,Miller,40.0,San Diego,USA,frank.m@example.com,999-888-7777,70000.0
7,Grace,Wilson,27.0,San Diego,USA,grace.w@example.com,777-888-9999,62000.0
8,Hank,Taylor,29.0,Dallas,USA,hank.t@example.com,777-888-9999,58000.0
9,Ivy,Martinez,29.0,San Jose,USA,john.doe@example.com,666-555-4444,61000.0


In [83]:
load_data.interpolate()

  load_data.interpolate()


Unnamed: 0,First Name,Last Name,Age,City,Country,Email,Phone,Salary
0,John,Doe,25.0,New York,USA,john.doe@example.com,123-456-7890,50000.0
1,Alice,Smith,30.0,Los Angeles,USA,alice.smith@example.com,987-654-3210,60000.0
2,Bob,Johnson,28.0,Chicago,USA,,555-666-7777,55000.0
3,John,Doe,25.0,New York,USA,john.doe@example.com,,50000.0
4,Charlie,Brown,30.0,Houston,USA,charlie.b@example.com,222-333-4444,52000.0
5,Emily,Davis,35.0,Phoenix,USA,,111-222-3333,61000.0
6,Frank,Miller,40.0,,USA,frank.m@example.com,999-888-7777,70000.0
7,Grace,Wilson,27.0,San Diego,USA,grace.w@example.com,,62000.0
8,Hank,Taylor,28.0,Dallas,USA,hank.t@example.com,777-888-9999,58000.0
9,Ivy,Martinez,29.0,San Jose,USA,,666-555-4444,61000.0


In [55]:
pd.isna

<function pandas.core.dtypes.missing.isna(obj: 'object') -> 'bool | npt.NDArray[np.bool_] | NDFrame'>

In [57]:
# retrive all the column from the datafream
df.columns

Index(['Name', 'Age', 'Sex', 'Salary', 'Country'], dtype='object')

In [59]:
# converting data fream obj to list
df1 = list(df.columns)
df1

['Name', 'Age', 'Sex', 'Salary', 'Country']

In [63]:
Name = df['Name']

In [81]:
df1 = df.reindex(index=Name[0:5])  
df1

Unnamed: 0_level_0,Name,Age,Sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Shivam Sing,,,
Vivek Panday,,,
Raju Shrivastava,,,
Shreya Singh,,,


In [34]:
df

Unnamed: 0,Name,Age,Sex
0,Shivam Sing,25,Male
1,Vivek Panday,40,Male
2,Raju Shrivastava,30,Male
3,Shreya Singh,35,Female


In [85]:
read_file.columns

Index(['Book_ID', 'Title', 'Author', 'Genre', 'Published_Year', 'Price',
       'Stock'],
      dtype='object')

In [63]:
data = {'A': [10, 20, 30, 40], 'B': [50, 60, 70, 80]}

# Creating a DataFrame 'dfx' using the data dictionary  
# The index parameter provides custom labels for the rows of the DataFrame  
# Here, the rows are labeled as 'row1', 'row2', 'row3', and 'row4'  
dfx = pd.DataFrame(data, index=['row1', 'row2', 'row3', 'row4'])
dfx

Unnamed: 0,A,B
row1,10,50
row2,20,60
row2,30,70
row3,40,80


In [97]:
  Author = read_file['Author']
Author

0             Joseph Crane
1              Mario Moore
2           Derrick Howard
3      Christopher Andrews
4              Juan Miller
              ...         
495            James Adams
496         Craig Thompson
497          Heather Marks
498           Isaac Nelson
499         David Hatfield
Name: Author, Length: 500, dtype: object

In [101]:
df2= df.reindex(index= Author[0:5])
df2

Unnamed: 0_level_0,Name,Age,Sex
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Joseph Crane,,,
Mario Moore,,,
Derrick Howard,,,
Christopher Andrews,,,
Juan Miller,,,


In [44]:
# Reading or Importing a CSV file into a pandas DataFrame
read_file0 = pd.read_csv(r"C:\Users\HP\Downloads\sales_data.csv", usecols = ["ORDERNUMBER"])

In [46]:
read_file0

Unnamed: 0,ORDERNUMBER
0,10107
1,10121
2,10134
3,10145
4,10159
...,...
2818,10350
2819,10373
2820,10386
2821,10397


In [48]:
# observe the output there is no header 
read_file1 = pd.read_csv(r"C:\Users\HP\Downloads\sales_data.csv", header =None)

In [50]:
read_file1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,...,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE,ORDER_YEAR,ORDER_MONTH,ORDER_DAY
1,10107,30,95.7,2,2871.0,2003-02-24,Shipped,1,2,2003,...,NY,10022,USA,Unknown,Yu,Kwai,Small,2003,2,24
2,10121,34,81.35,5,2765.9,2003-05-07,Shipped,2,5,2003,...,Unknown,51100,France,EMEA,Henriot,Paul,Small,2003,5,7
3,10134,41,94.74,2,3884.34,2003-07-01,Shipped,3,7,2003,...,Unknown,75508,France,EMEA,Da Cunha,Daniel,Medium,2003,7,1
4,10145,45,83.26,6,3746.7,2003-08-25,Shipped,3,8,2003,...,CA,90003,USA,Unknown,Young,Julie,Medium,2003,8,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2819,10350,20,100.0,15,2244.4,2004-12-02,Shipped,4,12,2004,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Small,2004,12,2
2820,10373,29,100.0,1,3978.51,2005-01-31,Shipped,1,1,2005,...,Unknown,90110,Finland,EMEA,Koskitalo,Pirkko,Medium,2005,1,31
2821,10386,43,100.0,4,5417.57,2005-03-01,Resolved,1,3,2005,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Medium,2005,3,1
2822,10397,34,62.24,1,2116.16,2005-03-28,Shipped,1,3,2005,...,Unknown,31000,France,EMEA,Roulet,Annette,Small,2005,3,28


In [11]:
# observe the output there is custom header 
read_filek2 = pd.read_csv(r"C:\Users\HP\Downloads\sales_data.csv", names = [ "q_order", "q_number", "price", "bhv", "bihg", 
    "col_5", "col_8", "col_3", "col_9", "col_2", 
    "col_45", "col_26", "col_35", "col_25", "bvib", 
    "nbjib", "col_5_dup", "col_6", "col_85", "col_25_dup", 
    "col_52", "col_65", "col_25_dup2"], header = None)

In [13]:
read_filek2

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,q_order,q_number,price,bhv,bihg,col_5,col_8,col_3,col_9,col_2,...,col_25,bvib,nbjib,col_5_dup,col_6,col_85,col_25_dup,col_52,col_65,col_25_dup2
ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,PRODUCTLINE,MSRP,PRODUCTCODE,CUSTOMERNAME,PHONE,...,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE,ORDER_YEAR,ORDER_MONTH,ORDER_DAY
10107,30,95.7,2,2871.0,2003-02-24,Shipped,1,2,2003,Motorcycles,95,S10_1678,Land of Toys Inc.,2125557818,...,NY,10022,USA,Unknown,Yu,Kwai,Small,2003,2,24
10121,34,81.35,5,2765.9,2003-05-07,Shipped,2,5,2003,Motorcycles,95,S10_1678,Reims Collectables,26.47.1555,...,Unknown,51100,France,EMEA,Henriot,Paul,Small,2003,5,7
10134,41,94.74,2,3884.34,2003-07-01,Shipped,3,7,2003,Motorcycles,95,S10_1678,Lyon Souveniers,+33 1 46 62 7555,...,Unknown,75508,France,EMEA,Da Cunha,Daniel,Medium,2003,7,1
10145,45,83.26,6,3746.7,2003-08-25,Shipped,3,8,2003,Motorcycles,95,S10_1678,Toys4GrownUps.com,6265557265,...,CA,90003,USA,Unknown,Young,Julie,Medium,2003,8,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10350,20,100.0,15,2244.4,2004-12-02,Shipped,4,12,2004,Ships,54,S72_3212,Euro Shopping Channel,(91) 555 94 44,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Small,2004,12,2
10373,29,100.0,1,3978.51,2005-01-31,Shipped,1,1,2005,Ships,54,S72_3212,"Oulu Toy Supplies, Inc.",981-443655,...,Unknown,90110,Finland,EMEA,Koskitalo,Pirkko,Medium,2005,1,31
10386,43,100.0,4,5417.57,2005-03-01,Resolved,1,3,2005,Ships,54,S72_3212,Euro Shopping Channel,(91) 555 94 44,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Medium,2005,3,1
10397,34,62.24,1,2116.16,2005-03-28,Shipped,1,3,2005,Ships,54,S72_3212,Alpha Cognac,61.77.6555,...,Unknown,31000,France,EMEA,Roulet,Annette,Small,2005,3,28


In [15]:

# Define proper column names
column_names = [
    "q_order", "q_number", "price", "bhv", "bihg", 
    "col_5", "col_8", "col_3", "col_9", "col_2", 
    "col_45", "col_26", "col_35", "col_25", "bvib", 
    "nbjib", "col_5_dup", "col_6", "col_85", "col_25_dup", 
    "col_52", "col_65", "col_25_dup2"
]

# Read CSV with custom headers
read_file3 = pd.read_csv(r"C:\Users\HP\Downloads\sales_data.csv", names=column_names, header=None)


read_file3

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,q_order,q_number,price,bhv,bihg,col_5,col_8,col_3,col_9,col_2,...,col_25,bvib,nbjib,col_5_dup,col_6,col_85,col_25_dup,col_52,col_65,col_25_dup2
ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,PRODUCTLINE,MSRP,PRODUCTCODE,CUSTOMERNAME,PHONE,...,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE,ORDER_YEAR,ORDER_MONTH,ORDER_DAY
10107,30,95.7,2,2871.0,2003-02-24,Shipped,1,2,2003,Motorcycles,95,S10_1678,Land of Toys Inc.,2125557818,...,NY,10022,USA,Unknown,Yu,Kwai,Small,2003,2,24
10121,34,81.35,5,2765.9,2003-05-07,Shipped,2,5,2003,Motorcycles,95,S10_1678,Reims Collectables,26.47.1555,...,Unknown,51100,France,EMEA,Henriot,Paul,Small,2003,5,7
10134,41,94.74,2,3884.34,2003-07-01,Shipped,3,7,2003,Motorcycles,95,S10_1678,Lyon Souveniers,+33 1 46 62 7555,...,Unknown,75508,France,EMEA,Da Cunha,Daniel,Medium,2003,7,1
10145,45,83.26,6,3746.7,2003-08-25,Shipped,3,8,2003,Motorcycles,95,S10_1678,Toys4GrownUps.com,6265557265,...,CA,90003,USA,Unknown,Young,Julie,Medium,2003,8,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10350,20,100.0,15,2244.4,2004-12-02,Shipped,4,12,2004,Ships,54,S72_3212,Euro Shopping Channel,(91) 555 94 44,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Small,2004,12,2
10373,29,100.0,1,3978.51,2005-01-31,Shipped,1,1,2005,Ships,54,S72_3212,"Oulu Toy Supplies, Inc.",981-443655,...,Unknown,90110,Finland,EMEA,Koskitalo,Pirkko,Medium,2005,1,31
10386,43,100.0,4,5417.57,2005-03-01,Resolved,1,3,2005,Ships,54,S72_3212,Euro Shopping Channel,(91) 555 94 44,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Medium,2005,3,1
10397,34,62.24,1,2116.16,2005-03-28,Shipped,1,3,2005,Ships,54,S72_3212,Alpha Cognac,61.77.6555,...,Unknown,31000,France,EMEA,Roulet,Annette,Small,2005,3,28


In [76]:
# making N row header
read_file3 = pd.read_csv(r"C:\Users\HP\Downloads\sales_data.csv", header =2)

In [78]:
read_file3

Unnamed: 0,10121,34,81.35,5,2765.9,2003-05-07,Shipped,2,5.1,2003,...,Unknown,51100,France,EMEA,Henriot,Paul,Small,2003.1,5.2,7
0,10134,41,94.74,2,3884.34,2003-07-01,Shipped,3,7,2003,...,Unknown,75508,France,EMEA,Da Cunha,Daniel,Medium,2003,7,1
1,10145,45,83.26,6,3746.70,2003-08-25,Shipped,3,8,2003,...,CA,90003,USA,Unknown,Young,Julie,Medium,2003,8,25
2,10159,49,100.00,14,5205.27,2003-10-10,Shipped,4,10,2003,...,CA,00000,USA,Unknown,Brown,Julie,Medium,2003,10,10
3,10168,36,96.66,1,3479.76,2003-10-28,Shipped,4,10,2003,...,CA,94217,USA,Unknown,Hirano,Juri,Medium,2003,10,28
4,10180,29,86.13,9,2497.77,2003-11-11,Shipped,4,11,2003,...,Unknown,59000,France,EMEA,Rance,Martine,Small,2003,11,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2816,10350,20,100.00,15,2244.40,2004-12-02,Shipped,4,12,2004,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Small,2004,12,2
2817,10373,29,100.00,1,3978.51,2005-01-31,Shipped,1,1,2005,...,Unknown,90110,Finland,EMEA,Koskitalo,Pirkko,Medium,2005,1,31
2818,10386,43,100.00,4,5417.57,2005-03-01,Resolved,1,3,2005,...,Unknown,28034,Spain,EMEA,Freyre,Diego,Medium,2005,3,1
2819,10397,34,62.24,1,2116.16,2005-03-28,Shipped,1,3,2005,...,Unknown,31000,France,EMEA,Roulet,Annette,Small,2005,3,28


In [None]:
pandas.read_json(path_or_buf, *, orient=None, typ='frame',
                 dtype=None, convert_axes=None, convert_dates=True,
                 keep_default_dates=True, precise_float=False, date_unit=None,
                 encoding=None, encoding_errors='strict', lines=False, chunksize=None, 
                 compression='infer', nrows=None, storage_options=None, 
                 dtype_backend=<no_default>, engine='ujson')

In [None]:
pandas.read_sql(sql, con, index_col=None, 
                coerce_float=True, params=None, 
                parse_dates=None, columns=None, 
                chunksize=None,
                dtype_backend=<no_default>, dtype=None)

Working with Duplicate Label

In [87]:
# You can check whether an Index (storing the row or column labels) is unique with Index.is_unique:
load_data.index.is_unique

True

In [91]:
load_data.columns.is_unique

True

In [93]:
load_data.index.duplicated()

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False])

In [97]:
# Disallowing Duplicate Labels
pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=["A", "B", "C"],).set_flags(
    allows_duplicate_labels=False)


Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5


In [101]:

 deduplicated = load_data.groupby(level=0).first()  # remove duplicates
deduplicated.flags.allows_duplicate_labels = False  # disallow going forward

In [103]:
load_data

Unnamed: 0,First Name,Last Name,Age,City,Country,Email,Phone,Salary
0,John,Doe,25.0,New York,USA,john.doe@example.com,123-456-7890,50000.0
1,Alice,Smith,30.0,Los Angeles,USA,alice.smith@example.com,987-654-3210,60000.0
2,Bob,Johnson,28.0,Chicago,USA,,555-666-7777,55000.0
3,John,Doe,25.0,New York,USA,john.doe@example.com,,50000.0
4,Charlie,Brown,,Houston,USA,charlie.b@example.com,222-333-4444,52000.0
5,Emily,Davis,35.0,Phoenix,USA,,111-222-3333,
6,Frank,Miller,40.0,,USA,frank.m@example.com,999-888-7777,70000.0
7,Grace,Wilson,27.0,San Diego,USA,grace.w@example.com,,62000.0
8,Hank,Taylor,,Dallas,USA,hank.t@example.com,777-888-9999,58000.0
9,Ivy,Martinez,29.0,San Jose,USA,,666-555-4444,61000.0
