In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Working with csv and json file
You can use most of the csv commands to json also

#### ✅ Method 1: If you have data

In [3]:
# df = pd.read_csv("code.csv")

#### ✅ Method 2: If you have url of csv raw file (like githbub)

In [None]:
# import requests
# from io import StringIO

# url = "put your url here"
# headers = {
#     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0"
# }

# req = requests.get(url, headers=headers)
# data = StringIO(req.text)

# import pandas as pd
# df = pd.read_csv(data)


#### ✅ Method 3: sep parameter, dealing with .tsv file

In [None]:
# df = pd.read_csv('hello.csv', sep='\t')

#### ✅ Method 4: If you do not have features and first row is being used as feature

In [4]:
# df = pd.read_csv("hello.csv", names=["col1", "col2", "col3"])

#### ✅ Method 5: Make any column as an index

In [6]:
# df = pd.read_csv("hello.csv", index_col="customer_id")

#### ✅ Method 6: If your feature name is in the row 

In [8]:
# pd.read_csv("hello.csv", header=1)

#### ✅ Method 7: If you want to take only selected col from the beginning

In [9]:
# pd.read_csv("hello.csv", usecols=["col1", "col2", "col3"])

#### ✅ Method 8: To get only n-number of rows

In [10]:
# pd.read_csv("hello.csv", nrows=100)

#### ✅ Method 9: If your data is not loading and showing some encoding error, then search for their encoding and mention it in the code or edit it in some encoding editor

In [None]:
# this is random encoding for example, you can change it to "utf-8", "latin1", etc.
# pd.read_csv("hello.csv", encoding="latin1")

#### ✅ Method 10: If you get Parser error

In [13]:
# pd.read_csv("hello.csv", error_bad_lines=False)

#### ✅ Method 11: If your target column is in float 0.0 and 1.0 and you want to change it into int

In [14]:
# pd.read_csv("hello.csv", dtype={"target":int})

#### ✅ Method 12: If you get date in string format 

In [15]:
# pd.read_csv("hello.csv", parse_dates=['date'])

#### ✅ Method 13: If you want to convert let team name like Royal Challengers Benguluru, Chennai Super Kings to RCB and CSK in any column

In [19]:
df = pd.read_csv("IPL Matches 2008-2020.csv")
df.head()

Unnamed: 0,id,city,date,player_of_match,venue,neutral_venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,eliminator,method,umpire1,umpire2
0,335982,Bangalore,2008-04-18,BB McCullum,M Chinnaswamy Stadium,0,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,N,,Asad Rauf,RE Koertzen
1,335983,Chandigarh,2008-04-19,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",0,Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,N,,MR Benson,SL Shastri
2,335984,Delhi,2008-04-19,MF Maharoof,Feroz Shah Kotla,0,Delhi Daredevils,Rajasthan Royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9.0,N,,Aleem Dar,GA Pratapkumar
3,335985,Mumbai,2008-04-20,MV Boucher,Wankhede Stadium,0,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,Royal Challengers Bangalore,wickets,5.0,N,,SJ Davis,DJ Harper
4,335986,Kolkata,2008-04-20,DJ Hussey,Eden Gardens,0,Kolkata Knight Riders,Deccan Chargers,Deccan Chargers,bat,Kolkata Knight Riders,wickets,5.0,N,,BF Bowden,K Hariharan


In [16]:
def replace_team_names(team_name):
    team_name = team_name.lower()
    if "royal challengers" in team_name:
        return "RCB"
    elif "chennai super kings" in team_name:
        return "CSK"
    elif "mumbai indians" in team_name:
        return "MI"
    elif "delhi capitals" in team_name:
        return "DC"
    elif "kolkata knight riders" in team_name:
        return "KKR"
    elif "sunrisers hyderabad" in team_name:
        return "SRH"
    else:
        return team_name

In [21]:
df_new = pd.read_csv("IPL Matches 2008-2020.csv", converters={
    "team1": replace_team_names,"team2": replace_team_names
})

In [23]:
df_new.head()

Unnamed: 0,id,city,date,player_of_match,venue,neutral_venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,eliminator,method,umpire1,umpire2
0,335982,Bangalore,2008-04-18,BB McCullum,M Chinnaswamy Stadium,0,RCB,KKR,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,N,,Asad Rauf,RE Koertzen
1,335983,Chandigarh,2008-04-19,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",0,kings xi punjab,CSK,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,N,,MR Benson,SL Shastri
2,335984,Delhi,2008-04-19,MF Maharoof,Feroz Shah Kotla,0,delhi daredevils,rajasthan royals,Rajasthan Royals,bat,Delhi Daredevils,wickets,9.0,N,,Aleem Dar,GA Pratapkumar
3,335985,Mumbai,2008-04-20,MV Boucher,Wankhede Stadium,0,MI,RCB,Mumbai Indians,bat,Royal Challengers Bangalore,wickets,5.0,N,,SJ Davis,DJ Harper
4,335986,Kolkata,2008-04-20,DJ Hussey,Eden Gardens,0,KKR,deccan chargers,Deccan Chargers,bat,Kolkata Knight Riders,wickets,5.0,N,,BF Bowden,K Hariharan


#### ✅ Method 14: If you get some bad values like $ % in the data, maybe in gender column

In [24]:
# pd.read_csv("hello.csv", na_values=["$","%"])

#### ✅ Method 15: If the data is big and you want to divide it into chunks

In [31]:
df = pd.read_csv("IPL Matches 2008-2020.csv", chunksize=10)

In [32]:
for chunks in df:
    print(chunk.shape)

(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)
(16, 17)


### For JSON files

In [33]:
# pd.read_json("hello.json")

In [34]:
# if data is in url
# pd.read_json("https://example.com/data.json")

You can use most of the .csv methods in JSON

#