## Import Libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
import seaborn as sns
import matplotlib.pyplot as plt

## Read the dataset

In [None]:
df=pd.read_csv('../input/ramen-ratings/ramen-ratings.csv')

## Display the first 10 lines of the dataframe

In [None]:
df.head(10)

## Checking the number of missing values

In [None]:
df.isnull().sum()

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df['Top Ten'].unique()

## Unique Countries

In [None]:
df.Country.unique()

In [None]:
len(df.Country.unique())

## Segregating the countries into respective Continent 

In [None]:
continents=[]

In [None]:
for country in df['Country']:
    if country in ['Japan', 'Taiwan', 'India', 'South Korea', 'Singapore','Thailand', 
                   'Hong Kong', 'Vietnam', 'Malaysia','Indonesia', 'China','Pakistan',
                   'Bangladesh', 'Nepal','Myanmar','Cambodia', 'Sarawak', 'Philippines', 'Dubai']:
        continents.append('Asia')
    if country in ['Ghana', 'Nigeria']:
        continents.append('Africa')
    if country in ['Fiji', 'Australia']:
        continents.append('Australia')
    if country in ['Germany', 'Hungary','UK', 'Netherlands','Finland', 'Sweden','Estonia', 'Holland', 'Poland']:
        continents.append('Europe')
    if country in ['USA','Mexico','United States','Canada']:
        continents.append('North America')
    if country in ['Brazil','Colombia']:
        continents.append('South America')
    

In [None]:
df['Continent']=continents

In [None]:
df.head(10)

## Conversion of datatypes

In [None]:
df.dtypes

In [None]:
df['Brand']=df['Brand'].astype('category')
df['Variety']=df['Variety'].astype('category')
df['Style']=df['Style'].astype('category')
df['Country']=df['Country'].astype('category')
df['Continent']=df['Continent'].astype('category')

In [None]:
df.dtypes

In [None]:
df['Stars']=pd.to_numeric(df['Stars'],downcast="float",errors='coerce')

In [None]:
df.dtypes

In [None]:
df[df['Country']=='United States']

In [None]:
df.replace(to_replace ='United States' ,value ="USA",inplace=True)

In [None]:
df.Country.unique()

## Handling Missing Values

In [None]:
df.Style.isna().sum()

In [None]:
df[df['Style'].isnull()]

In [None]:
df[df['Brand']=='Kamfen']

In [None]:
df[df['Brand']=='Kamfen']['Style'].mode()

In [None]:
df[df['Brand']=='Kamfen']['Style'].value_counts()

In [None]:
df[df['Brand']=='Unif']

In [None]:
df[df['Brand']=='Unif']['Style'].value_counts()

In [None]:
df.isna().sum()

In [None]:
df['Top Ten'].unique()

In [None]:
df['Top Ten']

In [None]:
df['Top Ten'].fillna(0,inplace=True)

In [None]:
df.isna().sum()

In [None]:
df['Stars'].unique()

In [None]:
df.dropna(inplace=True)

In [None]:
df.isna().sum()

In [None]:
df.head()

In [None]:
df.to_csv("RamenaRatingKaggle.csv",index=False)

## [Excel Visualization](https://docs.google.com/spreadsheets/d/1CWMAuRg7UA4Ml1PHduXcx6_K813XxdN9Jhk6ElFQ400/edit?usp=sharing)

In [None]:
df[df['Continent']=='Asia']

In [None]:
df.isna().sum()

## Continent Asia dataframe

In [None]:
d_Asia=df[df['Continent']=='Asia']

In [None]:
d_Asia.head()

In [None]:
top5BrandsAsia=d_Asia.groupby('Brand')['Stars'].mean().sort_values(ascending=False).head(5)

In [None]:
top5BrandsAsia

In [None]:
tA5=top5BrandsAsia.reset_index()

In [None]:
tA5list=tA5['Brand'].values.tolist()

In [None]:
tA5list

In [None]:
d_Asia[d_Asia['Brand'].isin(tA5list)]

## Europe Continent

In [None]:
d_EU=df[df['Continent']=='Europe']

In [None]:
top5BrandsEU=d_EU.groupby('Brand')['Stars'].mean().sort_values(ascending=False).head(5)

In [None]:
top5BrandsEU

In [None]:
tE5=top5BrandsEU.reset_index()

In [None]:
tE5list=tE5['Brand'].values.tolist()

In [None]:
tE5list

In [None]:
d_EU[d_EU['Brand'].isin(tE5list)].sort_values(by='Stars',ascending=False)

## Africa Continent

In [None]:
d_Africa=df[df['Continent']=='Africa']

In [None]:
d_Africa

In [None]:
top5BrandsAfrica=d_Africa.groupby('Brand')['Stars'].mean().sort_values(ascending=False).head(3)

In [None]:
top5BrandsAfrica

In [None]:
tAf5=top5BrandsAfrica.reset_index()

In [None]:
tAf5list=tAf5['Brand'].values.tolist()

In [None]:
tAf5list

In [None]:
d_Africa[d_Africa['Brand'].isin(tAf5list)].sort_values(by='Stars',ascending=False)

## Australia Continent 

In [None]:
d_Australia=df[df['Continent']=='Australia']

In [None]:
d_Australia

In [None]:
top5BrandsAustralia=d_Australia.groupby('Brand')['Stars'].mean().sort_values(ascending=False).head(5)

In [None]:
top5BrandsAustralia

In [None]:
tAust5=top5BrandsAustralia.reset_index()

In [None]:
tAust5

In [None]:
tAus5list=tAust5['Brand'].values.tolist()

In [None]:
tAus5list

In [None]:
d_Australia[d_Australia['Brand'].isin(tAus5list)].sort_values(by='Stars',ascending=False)

## North America Continent

In [None]:
d_NA=df[df['Continent']=='North America']

In [None]:
top5BrandsNA=d_NA.groupby('Brand')['Stars'].mean().sort_values(ascending=False).head(5)

In [None]:
tNA5=top5BrandsNA.reset_index()

In [None]:
tNA5list=tNA5['Brand'].values.tolist()

In [None]:
tNA5list

In [None]:
d_NA[d_NA['Brand'].isin(tNA5list)].sort_values(by='Stars',ascending=False)

The Brand Yamchan is No 1 in USA

The Brand Plats du chef is popular in Canada

In [None]:
d_NA[d_NA.Country=='Mexico']

In [None]:
d_NA.Country.unique()

## South America Continent

In [None]:
d_SA=df[df['Continent']=='South America']

In [None]:
top5BrandsSA=d_SA.groupby('Brand')['Stars'].mean().sort_values(ascending=False).head(5)

In [None]:
tSA5=top5BrandsSA.reset_index()

In [None]:
tSA5list=tSA5['Brand'].values.tolist()

In [None]:
tSA5list

In [None]:
SA_Popular=d_SA[d_SA['Brand'].isin(tSA5list)].sort_values(by='Stars',ascending=False)

In [None]:
SA_Popular

In [None]:
SA_Popular['Style'].value_counts()

In [None]:
d_SA.Country.unique()

# Conclusion:
* ### Popular ,most reviewed and easily available are Ramen in the Style Pack
* ### Continent wise :
    *     Asia : 'Prima Taste', 'Takamori', 'Tao Kae Noi', 'The Bridge', 'Nyor Nyar'
    *     Africa : 'Lele', 'Yum-Mie', 'Indomie'
    *     Australia : 'FMF', 'Maggi', 'Suimin', 'Fantastic', 'Singa-Me'
    *     Europe : 'Golden Wonder', 'Vifon', 'Kabuto Noodles', 'Nissin', 'Thai Chef'
    *     North America : 'Komforte Chockolates','Daifuku','Jackpot Teriyaki','Plats Du Chef','Yamachan'
    *     South America : 'Nissin', '1 To 3 Noodles', '7 Select', '7 Select/Nissin', 'A-One'


## Pandas Profiling

In [None]:
!pip install pandas-profiling

In [None]:
import pandas_profiling as pp

pp.ProfileReport(df)

## Pandas Visual Analysis

In [None]:
!pip install pandas_visual_analysis

Kaggle Notebook which mentioned Visual Analysis
https://www.kaggle.com/akshat0007/eda-in-just-one-line-of-code-on-ramen-data

In [None]:
from pandas_visual_analysis import VisualAnalysis

In [None]:
analysis=VisualAnalysis(df)

In [None]:
analysis