# Reading dataset from GitHub using requests

In [1]:
# !pip install requests

In [2]:
import pandas as pd
import requests
from io import StringIO

Here, the requests.get() function is used to send an HTTP GET request to the specified URL. This function returns a Response object, and you've assigned it to the variable response. This object contains the server's response to the request.

In [3]:
url = 'https://github.com/rashakil-ds/5-Minutes-to-Pandas/raw/main/bike.csv'
response = requests.get(url)

The StringIO class from the io module is used to create a file-like object from the text content of the response. response.text contains the content of the CSV file as a string, and StringIO helps to treat this string as a file-like object that pd.read_csv can read.

In [4]:
csv_content = StringIO(response.text)

In [5]:
df = pd.read_csv(csv_content)
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,1/1/2011,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,16
1,2,1/1/2011,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,40
2,3,1/1/2011,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,32
3,4,1/1/2011,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,13
4,5,1/1/2011,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,1


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17379 entries, 0 to 17378
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   instant     17379 non-null  int64  
 1   dteday      17379 non-null  object 
 2   season      17379 non-null  int64  
 3   yr          17379 non-null  int64  
 4   mnth        17379 non-null  int64  
 5   hr          17379 non-null  int64  
 6   holiday     17379 non-null  int64  
 7   weekday     17379 non-null  int64  
 8   workingday  17379 non-null  int64  
 9   weathersit  17379 non-null  int64  
 10  temp        17379 non-null  float64
 11  atemp       17379 non-null  float64
 12  hum         17379 non-null  float64
 13  windspeed   17379 non-null  float64
 14  cnt         17379 non-null  int64  
dtypes: float64(4), int64(10), object(1)
memory usage: 2.0+ MB


In [7]:
df.shape

(17379, 15)

# Reading dataset from GitHub using web get

In [8]:
#!pip install wget

In [9]:
import wget

# Replace the URL with the raw URL of your CSV file on GitHub
url = 'https://raw.githubusercontent.com/rashakil-ds/5-Minutes-to-Pandas/main/bike.csv'

# Use wget to download the CSV file
file_path = wget.download(url)

  0% [                                                                          ]       0 / 1032184  0% [                                                                          ]    8192 / 1032184  1% [.                                                                         ]   16384 / 1032184  2% [.                                                                         ]   24576 / 1032184  3% [..                                                                        ]   32768 / 1032184  3% [..                                                                        ]   40960 / 1032184  4% [...                                                                       ]   49152 / 1032184  5% [....                                                                      ]   57344 / 1032184  6% [....                                                                      ]   65536 / 1032184  7% [.....                                                                     ]   73728 / 1032184

In [10]:
# Use pd.read_csv to read the CSV data into a Pandas DataFrame
df = pd.read_csv(file_path)

In [11]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,1/1/2011,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,16
1,2,1/1/2011,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,40
2,3,1/1/2011,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,32
3,4,1/1/2011,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,13
4,5,1/1/2011,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,1


In [12]:
df.shape

(17379, 15)

# Reading dataset from GitHub using direct raw data URL

In [13]:
url = 'https://raw.githubusercontent.com/rashakil-ds/5-Minutes-to-Pandas/main/bike.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,1/1/2011,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,16
1,2,1/1/2011,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,40
2,3,1/1/2011,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,32
3,4,1/1/2011,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,13
4,5,1/1/2011,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,1


In [14]:
df.shape

(17379, 15)

# Reading Data From CSV/Excel

1. XLSX (Excel Workbook): This is a binary file format associated with Microsoft Excel. It can store multiple sheets with formatting, formulas, charts, and images.
<br>
2. CSV (Comma-Separated Values): This is a plain text file where values are separated by commas. It contains only the data without any formatting, formulas, or other elements.

In [15]:
import pandas as pd
df = pd.read_excel('data1.xlsx')

In [16]:
df

Unnamed: 0,age,income
0,20,40000
1,27,50000
2,25,52000
3,40,60000
4,50,75000
5,22,45000


In [18]:
import pandas as pd
df = pd.read_excel('data1.xlsx', sheet_name='Sheet1')
df

Unnamed: 0,age,income
0,20,40000
1,27,50000
2,25,52000
3,40,60000
4,50,75000
5,22,45000


In [19]:
#csv
df = pd.read_csv('data1.csv', sep=';')
df

Unnamed: 0,age,income
0,20,40000
1,27,50000
2,25,52000
3,40,60000
4,50,75000
5,22,45000


# Data in Diffrent Folder

In [20]:
#csv
df = pd.read_csv("C:\\Local Disk F\\aiQuest Intelligence\\Data Science and Machine Learning\\Batch 18\\Class 10\mall customers.csv")
df

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
...,...,...,...,...,...
195,196,Female,35,120,79
196,197,Female,45,126,28
197,198,Male,32,126,74
198,199,Male,32,137,18
