# *DATA SCIENCE / SECTION 1 / PROJECT*



# 🏆 프로젝트 목표


> 다음 분기 설계할 게임 분석

#  🕹️ Data Description

*   `Name` : 게임 이름
*   `Platform` : 게임이 지원되는 플랫폼의 이름
*   `Year` : 게임 출시 연도
*   `Genre` : 게임의 장르
*   `Publisher` : 게임을 제작한 회사
*   `NA_Sales` : 북미지역 출고량
*   `EU_Sales` : 유럽지역에서의 출고량
*   `JP_Sales` : 일본지역에서의 출고량
*   `Other_Sales `: 기타지역 출고량

#  데이터 분석




## **1. Gathering Data**

In [None]:
#라이브러리 불러오기

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#데이터셋 불러오기
df = pd.read_csv('https://ds-lecture-data.s3.ap-northeast-2.amazonaws.com/datasets/vgames2.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales
0,1,Candace Kane's Candy Factory,DS,2008.0,Action,Destineer,0.04,0.0,0.0,0.0
1,2,The Munchables,Wii,2009.0,Action,Namco Bandai Games,0.17,0.0,0.0,0.01
2,3,Otome wa Oanesama Boku ni Koi Shiteru Portable,PSP,2010.0,Adventure,Alchemist,0.0,0.0,0.02,0.0
3,4,Deal or No Deal: Special Edition,DS,2010.0,Misc,Zoo Games,0.04,0.0,0.0,0.0
4,5,Ben 10 Ultimate Alien: Cosmic Destruction,PS3,2010.0,Platform,D3Publisher,0.12,0.09,0.0,0.04


## **2. EDA**

In [None]:
#불필요한 행 제거
df = df.iloc[:,1:]
df.head()

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales
0,Candace Kane's Candy Factory,DS,2008.0,Action,Destineer,0.04,0.0,0.0,0.0
1,The Munchables,Wii,2009.0,Action,Namco Bandai Games,0.17,0.0,0.0,0.01
2,Otome wa Oanesama Boku ni Koi Shiteru Portable,PSP,2010.0,Adventure,Alchemist,0.0,0.0,0.02,0.0
3,Deal or No Deal: Special Edition,DS,2010.0,Misc,Zoo Games,0.04,0.0,0.0,0.0
4,Ben 10 Ultimate Alien: Cosmic Destruction,PS3,2010.0,Platform,D3Publisher,0.12,0.09,0.0,0.04


In [None]:
df[df.duplicated(['Name','Year'])] # 중복치 확인

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales
131,Call of Duty 4: Modern Warfare,PC,2007.0,Shooter,Activision,0,1.12,0,0.03
230,IL-2 Sturmovik: Birds of Prey,DS,2009.0,Simulation,505 Games,0.06,0,0,0
279,The Incredibles: Rise of the Underminer,XB,2005.0,Action,THQ,0.06,0.02,0,0
343,NBA 2K12,X360,2011.0,Sports,Take-Two Interactive,2.31,0.14,0.01,0.16
383,Madden NFL 07,GC,2006.0,Sports,Electronic Arts,0.48,0.13,0,0.02
...,...,...,...,...,...,...,...,...,...
16588,MindJack,PS3,2011.0,Shooter,Square Enix,0.07,0.08,0,0.03
16591,Madden NFL 2005,PS2,2004.0,Sports,Electronic Arts,4.18,0.26,0.01,80K
16593,Ice Age 2: The Meltdown,GC,2006.0,Platform,Vivendi Games,0.15,0.04,0,0.01
16595,NBA 2K16,PS3,2015.0,Sports,Take-Two Interactive,0.44,0.19,0.03,0.13


In [None]:
condition = (df.Name == 'NBA 2K12')
df[condition]

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales
270,NBA 2K12,PSP,2011.0,Sports,Take-Two Interactive,0.22,0.03,0.0,0.03
343,NBA 2K12,X360,2011.0,Sports,Take-Two Interactive,2.31,0.14,0.01,0.16
6929,NBA 2K12,PS3,2011.0,Sports,Take-Two Interactive,1610K,0.27,0.05,0.18
9112,NBA 2K12,PC,2011.0,Sports,Take-Two Interactive,0.09,0.05,0.0,0.02
15533,NBA 2K12,Wii,2011.0,Sports,Take-Two Interactive,0.3,0.03,0.0,0.02


In [None]:
#결측치, 데이터타입 확인
#year, genre, publisher에 결측치 존재하고, 출고량과 year의 데이터타입 변경 필요.

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16598 entries, 0 to 16597
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         16598 non-null  object 
 1   Platform     16598 non-null  object 
 2   Year         16327 non-null  float64
 3   Genre        16548 non-null  object 
 4   Publisher    16540 non-null  object 
 5   NA_Sales     16598 non-null  object 
 6   EU_Sales     16598 non-null  object 
 7   JP_Sales     16598 non-null  object 
 8   Other_Sales  16598 non-null  object 
dtypes: float64(1), object(8)
memory usage: 1.1+ MB


In [None]:
#year의 결측치
df[df['Year'].isnull()]

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales
31,Rock Band,X360,,Misc,Electronic Arts,1.93,0.34,0,0.21
109,Yu-Gi-Oh! 5D's Wheelie Breakers (JP sales),Wii,,Racing,Unknown,0,0,0.02,0
273,Jewel Link Chronicles: Mountains of Madness,DS,,Puzzle,Avanquest,0,0.06,0,0.01
358,Majesty 2: The Fantasy Kingdom Sim,X360,,Simulation,Unknown,0.03,0,0,0
430,Famista 64,N64,,Sports,Namco Bandai Games,0,0,0.17,0.03
...,...,...,...,...,...,...,...,...,...
16240,Freaky Flyers,GC,,Racing,Unknown,0.01,0,0,0
16275,Space Invaders,2600,,Shooter,Atari,2.36,0.14,0M,0.03
16278,Madden NFL 11,Wii,,Sports,Unknown,0.7,0,0,50K
16425,Yoostar on MTV,X360,,Misc,Unknown,0.06,0,0,0


In [None]:
#

In [None]:
#year의 데이터타입 float에서 int 로 바꿔주기
df['Year'] = df['Year'].apply(int)

ValueError: ignored