In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

### Data cleaning and preprosessing

In [3]:
df = pd.read_csv('FastFoodRestaurants.csv')

In [4]:
df.head()

Unnamed: 0,address,city,country,keys,latitude,longitude,name,postalCode,province,websites
0,324 Main St,Massena,US,us/ny/massena/324mainst/-1161002137,44.9213,-74.89021,McDonald's,13662,NY,"http://mcdonalds.com,http://www.mcdonalds.com/..."
1,530 Clinton Ave,Washington Court House,US,us/oh/washingtoncourthouse/530clintonave/-7914...,39.53255,-83.44526,Wendy's,43160,OH,http://www.wendys.com
2,408 Market Square Dr,Maysville,US,us/ky/maysville/408marketsquaredr/1051460804,38.62736,-83.79141,Frisch's Big Boy,41056,KY,"http://www.frischs.com,https://www.frischs.com..."
3,6098 State Highway 37,Massena,US,us/ny/massena/6098statehighway37/-1161002137,44.95008,-74.84553,McDonald's,13662,NY,"http://mcdonalds.com,http://www.mcdonalds.com/..."
4,139 Columbus Rd,Athens,US,us/oh/athens/139columbusrd/990890980,39.35155,-82.09728,OMG! Rotisserie,45701,OH,"http://www.omgrotisserie.com,http://omgrotisse..."


In [6]:
df.columns

Index(['address', 'city', 'country', 'keys', 'latitude', 'longitude', 'name',
       'postalCode', 'province', 'websites'],
      dtype='object')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   address     10000 non-null  object 
 1   city        10000 non-null  object 
 2   country     10000 non-null  object 
 3   keys        10000 non-null  object 
 4   latitude    10000 non-null  float64
 5   longitude   10000 non-null  float64
 6   name        10000 non-null  object 
 7   postalCode  10000 non-null  object 
 8   province    10000 non-null  object 
 9   websites    9535 non-null   object 
dtypes: float64(2), object(8)
memory usage: 781.4+ KB


In [8]:
# check null values
df.isnull().sum()

address         0
city            0
country         0
keys            0
latitude        0
longitude       0
name            0
postalCode      0
province        0
websites      465
dtype: int64

In [9]:
df.dropna(inplace=True) 

In [10]:
df.isnull().sum()

address       0
city          0
country       0
keys          0
latitude      0
longitude     0
name          0
postalCode    0
province      0
websites      0
dtype: int64

In [13]:
df.sample(5)

Unnamed: 0,address,city,country,keys,latitude,longitude,name,postalCode,province,websites
961,7337 Wooster Pike,Cincinnati,US,us/oh/cincinnati/7337woosterpike/-1161002137,39.142877,-84.362845,McDonald's,45227,OH,"http://mcdonalds.com,http://www.mcdonalds.com/..."
6404,1701 E Markland Ave,Kokomo,US,us/in/kokomo/1701emarklandave/93075755,40.47659,-86.11075,Arby's,46901,IN,https://locations.arbys.com/us/in/kokomo/1701-...
4137,1500 N US Highway 285,Fort Stockton,US,us/tx/fortstockton/1500nushighway285/-1161002137,30.89826,-102.90458,McDonald's,79735,TX,"http://mcdonalds.com,http://www.mcdonalds.com/..."
7209,2207 W Oak St,Palestine,US,us/tx/palestine/2207woakst/-230519289,31.745545,-95.655101,Jack in the Box,75801,TX,"http://www.jackinthebox.com,https://www.jackin..."
5369,2751 Queensgate Dr,Richland,US,us/wa/richland/2751queensgatedr/-1161002137,46.25908,-119.3105,McDonald's,99352,WA,"http://mcdonalds.com,http://www.mcdonalds.com/..."


In [14]:
df = df[["address", "keys", "city", "country", "name"]]

In [15]:
df.head()

Unnamed: 0,address,keys,city,country,name
0,324 Main St,us/ny/massena/324mainst/-1161002137,Massena,US,McDonald's
1,530 Clinton Ave,us/oh/washingtoncourthouse/530clintonave/-7914...,Washington Court House,US,Wendy's
2,408 Market Square Dr,us/ky/maysville/408marketsquaredr/1051460804,Maysville,US,Frisch's Big Boy
3,6098 State Highway 37,us/ny/massena/6098statehighway37/-1161002137,Massena,US,McDonald's
4,139 Columbus Rd,us/oh/athens/139columbusrd/990890980,Athens,US,OMG! Rotisserie


In [16]:
import seaborn as sns
import matplotlib.pyplot as plt


### Data Visualization (Exploratory Data Analysis)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9535 entries, 0 to 9999
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   address  9535 non-null   object
 1   keys     9535 non-null   object
 2   city     9535 non-null   object
 3   country  9535 non-null   object
 4   name     9535 non-null   object
dtypes: object(5)
memory usage: 447.0+ KB


In [18]:
df.describe()

Unnamed: 0,address,keys,city,country,name
count,9535,9535,9535,9535,9535
unique,9472,9535,2697,1,433
top,3900 Las Vegas Blvd S,us/ny/massena/324mainst/-1161002137,Cincinnati,US,McDonald's
freq,3,1,116,9535,1865


In [None]:
plt.figure(figsize=(10, 12))
sns.countplot(y=df['name'], order=df['name'].value_counts().index[:10])
plt.title('Top 10 ')