# 더로그 크롤링 코드

사이트 링크
- https://www.thelog.co.kr/index.do

사이트 특징
- 로그인 한 후 데이터 열람 가능
- html 방식으로 데이터를 전송(x)
- 유저가 특정 데이터 요청 -> 데이터 베이스에서 json 형태로 export -> 유저 화면에 보여짐

In [22]:
import requests
import json 
from tqdm import tqdm
import pandas as pd
from datetime import datetime, timedelta

In [23]:
#날짜 리스트 만들기
def date_range(start, end):
    start = datetime.strptime(start, "%Y%m%d")
    end = datetime.strptime(end, "%Y%m%d")
    dates = [(start + timedelta(days=i)).strftime("%Y%m%d") for i in range((end-start).days+1)]
    return dates

dates = date_range("20160531", "20161231")
dates.extend(date_range("20170101", "20171231")) 
dates.extend(date_range("20180101", "20181231"))
dates.extend(date_range("20190101", "20191231"))
dates.extend(date_range("20200101", "20201231"))
dates.extend(date_range("20210101", "20211031"))

In [24]:
#url 리스트 만들기 
url = "https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate={}&gameDataType=A&moreBtnOption=false"
url_lists = []

for date in dates:
    url_lists.append(url.format(date)) 

In [25]:
url_lists

['https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160531&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160601&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160602&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160603&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160604&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160605&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160606&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160607&gameDataType=A&moreBtnOption=false',
 'https://www.thelog.co.kr/api/service/gameRank.do?page=1&targetDate=20160608&ga

In [26]:
#데이터 프레임 생성
result = {
    "gameCode": [],
    "gameName": [],
    "gameRank": [],
    "gameRankUpDown": [],
    "gameShares": [],
    "gameTypeShares": [],
    "sharesUpDown": [],
    "timeCountTotal": [],
    "avgUsedTime": [],
    "userCountTotal": [],
    "useStoreCount": [],
    "gameRegDate": [],
    "publisher":[],
    "payType": [],
    "gameTypeName":[],
    'date':[]
}

In [27]:
#크롤링

#로그인 세팅
login_url = 'https://www.thelog.co.kr/login/loginProc.do'
session = requests.session()

params = dict()
params["loginId"]="oper00"
params["loginPasswd"]="Mktg202!"

res = session.post(login_url,data=params)
res.raise_for_status()

#데이터 가져오기 
for url_list in (url_lists):
        res = session.get(url_list)
        if res.status_code == requests.codes.ok:    
            game_data = json.loads(res.text) 
            for rank in game_data['gameRanks']:
                result["gameCode"].append(rank["gameCode"])
                result['gameName'].append(rank['gameName'])
                result["gameRank"].append(rank["gameRank"])
                result["gameRankUpDown"].append(rank["gameRankUpDown"])
                result["gameShares"].append(rank["gameShares"])
                result["gameTypeShares"].append(rank["gameTypeShares"])
                result["sharesUpDown"].append(rank["sharesUpDown"])
                result["timeCountTotal"].append(rank["timeCountTotal"])
                result["avgUsedTime"].append(rank["avgUsedTime"])
                result["userCountTotal"].append(rank["userCountTotal"])
                result["useStoreCount"].append(rank["useStoreCount"])
                result["gameRegDate"].append(rank["gameRegDate"])
                result["publisher"].append(rank["publisher"])
                result["payType"].append(rank["payType"])
                result["gameTypeName"].append(rank["gameTypeName"])
                result['date'].append(url_list)

        else:
            print("Error code")

In [28]:
#데이터 프레임 변환 후 export
df = pd.DataFrame(result)
df.head()
df.to_csv('theLog-scrap.csv')

# 더로그 지역별 데이터 크롤링

In [55]:
#날짜 리스트 만들기
from datetime import datetime, timedelta

def date_range(start, end):
    start = datetime.strptime(start, "%Y.%m.%d")
    end = datetime.strptime(end, "%Y.%m.%d")
    dates = [(start + timedelta(days=i)).strftime("%Y.%m.%d") for i in range((end-start).days+1)]
    return dates

In [56]:
dates = date_range("2016.05.31", "2016.12.31")
dates.extend(date_range("2017.01.01", "2017.12.31")) 
dates.extend(date_range("2018.01.01", "2018.12.31"))
dates.extend(date_range("2019.01.01", "2019.12.31"))
dates.extend(date_range("2020.01.01", "2020.12.31"))
dates.extend(date_range("2021.01.01", "2021.09.13"))

In [57]:
pages = list(range(1,19))
pages

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

In [58]:
#url 리스트 만들기 
url = "https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime={}&etime={}&page={}"
url_lists = []

for date in dates:
    n1 = date
    for page in pages:
        n2 = page
        url_lists.append(url.format(n1,n1,n2))

In [59]:
len(url_lists)

34776

In [60]:
url_lists

['https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=1',
 'https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=2',
 'https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=3',
 'https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=4',
 'https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=5',
 'https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=6',
 'https://www.thelog.co.kr/api/service/getPressAreaTotalRank.do?menuType=1&gameType=&timeCode=&stime=2016.05.31&etime=2016.05.31&page=7',
 'https://www.thelog.co.kr/api/ser

In [61]:
#데이터 프레임 생성
result = {
    "gameCode": [],
    "Total": [],
    "Seoul": [],
    "Gyeonggi": [],
    "Incheon": [],
    "Gangwon": [],
    "Gyeongnam": [],
    "Gyeongbuk": [],
    "Gwangju": [],
    "Daegu": [],
    "Daejeon": [],
    "Busan": [],
    "Ulsan":[],
    "Jeonnam": [],
    "Jeonbuk":[],
    "Jeju":[],
    "Chungnam":[],
    "Chungbuk":[],
    
    "TotalUserCount":[],
    "SeoulUserCount":[],
    "GyeonggiUserCount":[],
    "IncheonUserCount":[],
    "GangwonUserCount":[],
    "GyeongnamUserCount":[],
    "GyeongbukUserCount":[],
    "GwangjuUserCount":[],
    "DaeguUserCount":[],
    "DaejeonUserCount":[],
    "BusanUserCount":[],
    "UlsanUserCount":[],
    "JeonnamUserCount":[],
    "JeonbukUserCount":[],
    "JejuUserCount":[],
    "ChungnamUserCount":[],
    "ChungbukUserCount":[],
    
    
    "gameName":[],
    "publisher":[],
    "gameTypeName":[],
    "gameRank":[],
    'date':[]
    
}

In [62]:
from tqdm import notebook

#크롤링
import requests
import json 

#로그인 세팅
login_url = 'https://www.thelog.co.kr/login/loginProc.do'
session = requests.session()

params = dict()
params["loginId"]="oper00"
params["loginPasswd"]="Mktg202!"

res = session.post(login_url,data=params)
res.raise_for_status()

#데이터 가져오기 
for url_list in tqdm(url_lists):
        res = session.get(url_list)
        if res.status_code == requests.codes.ok:    
            game_data = json.loads(res.text) 
            for rank in game_data['myGameRanks']:
                result["gameCode"].append(rank["gameCode"])
                result["Total"].append(rank["areaTotal"])
                result['Seoul'].append(rank["areaSeoul"])
                result['Gyeonggi'].append(rank["areaGyeonggi"])
                result['Incheon'].append(rank["areaIncheon"])
                result['Gangwon'].append(rank["areaGangwon"])
                result['Gyeongnam'].append(rank["areaGyeongnam"])
                result['Gyeongbuk'].append(rank["areaGyeongbuk"])
                result['Gwangju'].append(rank["areaGwangju"])
                result['Daegu'].append(rank["areaDaegu"])
                result['Daejeon'].append(rank["areaDaejeon"])
                result['Busan'].append(rank["areaBusan"])
                result['Ulsan'].append(rank["areaUlsan"])
                result['Jeonnam'].append(rank["areaJeonnam"])
                result['Jeonbuk'].append(rank["areaJeonbuk"])
                result['Jeju'].append(rank["areaJeju"])
                result['Chungnam'].append(rank["areaChungnam"])
                result['Chungbuk'].append(rank["areaChungbuk"])
                
                result['TotalUserCount'].append(rank["areaTotalUserCount"])
                result['SeoulUserCount'].append(rank["areaSeoulUserCount"])
                result['GyeonggiUserCount'].append(rank["areaGyeonggiUserCount"])
                result['IncheonUserCount'].append(rank["areaIncheonUserCount"])
                result['GangwonUserCount'].append(rank["areaGangwonUserCount"])
                result['GyeongnamUserCount'].append(rank["areaGyeongnamUserCount"])
                result['GyeongbukUserCount'].append(rank["areaGyeongbukUserCount"])
                result['GwangjuUserCount'].append(rank["areaGwangjuUserCount"])
                result['DaeguUserCount'].append(rank["areaDaeguUserCount"])
                result['DaejeonUserCount'].append(rank["areaDaejeonUserCount"])
                result['BusanUserCount'].append(rank["areaBusanUserCount"])
                result['UlsanUserCount'].append(rank["areaUlsanUserCount"])
                result['JeonnamUserCount'].append(rank["areaJeonnamUserCount"])
                result['JeonbukUserCount'].append(rank["areaJeonbukUserCount"])
                result['JejuUserCount'].append(rank["areaJejuUserCount"])
                result['ChungnamUserCount'].append(rank["areaChungnamUserCount"])
                result['ChungbukUserCount'].append(rank["areaChungbukUserCount"])
                
                result['gameName'].append(rank["gameName"])
                result['publisher'].append(rank["publisher"])
                result['gameTypeName'].append(rank['gameTypeName'])
                result['gameRank'].append(rank['gameRank'])
                result['date'].append(url_list)
                
                
        else:
            print("Error code")

  5%|███▉                                                                         | 1786/34776 [00:49<15:12, 36.14it/s]


KeyboardInterrupt: 

In [63]:
#로그인 세팅
login_url = 'https://www.thelog.co.kr/login/loginProc.do'
session = requests.session()

params = dict()
params["loginId"]="oper00"
params["loginPasswd"]="Mktg202!"

res = session.post(login_url,data=params)
res.raise_for_status()

#데이터 가져오기 
for url_list in tqdm(url_lists):
        res = session.get(url_list)
        if res.status_code == requests.codes.ok:    
            game_data = json.loads(res.text) 
            for rank in game_data['myGameRanks']:
                print(rank)

  3%|██▍                                                                          | 1119/34776 [00:27<14:02, 39.97it/s]


KeyboardInterrupt: 

In [51]:
#데이터 프레임 변환 후 export
import pandas as pd
df = pd.DataFrame(result)
df.head()
df.to_csv('theLog-scrap2.csv')

In [53]:
df

Unnamed: 0,gameCode,Total,Seoul,Gyeonggi,Incheon,Gangwon,Gyeongnam,Gyeongbuk,Gwangju,Daegu,...,JeonnamUserCount,JeonbukUserCount,JejuUserCount,ChungnamUserCount,ChungbukUserCount,gameName,publisher,gameTypeName,gameRank,date
0,707,7420619383,1541939097,1749858809,403400136,144342535,427189269,360501434,254024943,413247055,...,36167,55799,18355,41714,44246,오버워치,블리자드,FPS,1,https://www.thelog.co.kr/api/service/getPressA...
1,545,4987453669,1072012682,1142036060,286685571,94005928,273608072,202711119,198142712,281893515,...,26361,36417,12331,27808,26452,리그 오브 레전드,라이엇게임즈,RTS,2,https://www.thelog.co.kr/api/service/getPressA...
2,588,1174687543,217744506,267402209,73386183,21677660,80187389,66752487,42686667,65286417,...,10431,15542,5415,10428,9711,피파온라인3,넥슨,스포츠,3,https://www.thelog.co.kr/api/service/getPressA...
3,1,1167681515,202121684,269940844,74533997,22390735,82212147,56953794,48851293,62167590,...,11676,13213,3864,12310,10199,서든어택,넥슨,FPS,4,https://www.thelog.co.kr/api/service/getPressA...
4,12,1007996290,227886035,247164810,62550692,15917423,49772359,43610322,40417488,45330771,...,4917,6524,2201,5682,4888,메이플스토리,넥슨,RPG,5,https://www.thelog.co.kr/api/service/getPressA...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
506895,834,1522,0,0,0,0,0,0,0,1522,...,0,0,0,0,0,HITMAN 2,스팀,액션,310,https://www.thelog.co.kr/api/service/getPressA...
506896,192,1445,0,462,0,0,0,0,0,0,...,0,0,0,0,0,아레스 온라인,엠게임,RPG,311,https://www.thelog.co.kr/api/service/getPressA...
506897,800,1342,1342,0,0,0,0,0,0,0,...,0,0,0,0,0,Far Cry 5,스팀,FPS,312,https://www.thelog.co.kr/api/service/getPressA...
506898,926,549,0,0,549,0,0,0,0,0,...,0,0,0,0,0,섀도우 아레나,(주)펄어비스,액션,313,https://www.thelog.co.kr/api/service/getPressA...


# 더로그 시간별 데이터 크롤링

In [1]:
#날짜 리스트 만들기
from datetime import datetime, timedelta

def date_range(start, end):
    start = datetime.strptime(start, "%Y.%m.%d")
    end = datetime.strptime(end, "%Y.%m.%d")
    dates = [(start + timedelta(days=i)).strftime("%Y.%m.%d") for i in range((end-start).days+1)]
    return dates

In [2]:
dates = date_range("2016.05.31", "2016.12.31")
dates.extend(date_range("2017.01.01", "2017.12.31")) 
dates.extend(date_range("2018.01.01", "2018.12.31"))
dates.extend(date_range("2019.01.01", "2019.12.31"))
dates.extend(date_range("2020.01.01", "2020.12.31"))
dates.extend(date_range("2021.01.01", "2021.11.17"))

In [3]:
sidos = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R']
sidos 

['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R']

In [4]:
pages = list(range(1,19))
pages

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

In [5]:
#url 리스트 만들기 
url = "https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode={}&stime={}&etime={}&page={}&gameDataType=A"
url_lists3 = []

for date in dates:
    n1 = date
    for sido in sidos:
        n2 = sido
        for page in pages:
            n3 = page
            url_lists3.append(url.format(n2,n1,n1,n3))
        

In [6]:
url_lists3

['https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=2016.05.31&etime=2016.05.31&page=1&gameDataType=A',
 'https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=2016.05.31&etime=2016.05.31&page=2&gameDataType=A',
 'https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=2016.05.31&etime=2016.05.31&page=3&gameDataType=A',
 'https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=2016.05.31&etime=2016.05.31&page=4&gameDataType=A',
 'https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=2016.05.31&etime=2016.05.31&page=5&gameDataType=A',
 'https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=2016.05.31&etime=2016.05.31&page=6&gameDataType=A',
 'https://www.thelog.co.kr/api/service/getPressTotalRank.do?menuType=0&gameType=&sidoCode=A&stime=20

In [7]:
len(url_lists3)

647028

In [8]:
from tqdm import tqdm
import pandas as pd

#크롤링
import requests
import json 

#로그인 세팅
login_url = 'https://www.thelog.co.kr/login/loginProc.do'
session = requests.session()

params = dict()
params["loginId"]="oper00"
params["loginPasswd"]="Mktg202!"

res = session.post(login_url,data=params)
res.raise_for_status()

list_of_df =[]

#데이터 가져오기 
for url_list in tqdm(url_lists3):
        res = session.get(url_list)
        gamedata = json.loads(res.text)
        
        try:     
            df = pd.json_normalize(gamedata['myGameRanks'])
            list_of_df.append(df)
            
                
        except:
            pass

 49%|██████████████████████████████████▌                                   | 319726/647028 [3:02:23<3:06:42, 29.22it/s]


ConnectionError: ('Connection aborted.', ConnectionAbortedError(10053, '현재 연결은 사용자의 호스트 시스템의 소프트웨어의 의해 중단되었습니다', None, 10053, None))

In [9]:
len(list_of_df)

301960

In [10]:
url_lists4 = url_lists3[301960:]

In [11]:
from tqdm import tqdm
import pandas as pd

#크롤링
import requests
import json 

#로그인 세팅
login_url = 'https://www.thelog.co.kr/login/loginProc.do'
session = requests.session()

params = dict()
params["loginId"]="oper00"
params["loginPasswd"]="Mktg202!"

res = session.post(login_url,data=params)
res.raise_for_status()


#데이터 가져오기 
for url_list in tqdm(url_lists4):
        res = session.get(url_list)
        gamedata = json.loads(res.text)
        
        try:     
            df = pd.json_normalize(gamedata['myGameRanks'])
            list_of_df.append(df)
            
                
        except:
            pass

 82%|███████████████████████████████████████████████████████████             | 283138/345068 [2:40:11<35:02, 29.46it/s]


ConnectionError: ('Connection aborted.', ConnectionResetError(10054, '현재 연결은 원격 호스트에 의해 강제로 끊겼습니다', None, 10054, None))

In [12]:
len(list_of_df)

569366

In [15]:
url_lists5 = url_lists3[569366:]

In [16]:
from tqdm import tqdm
import pandas as pd

#크롤링
import requests
import json 

#로그인 세팅
login_url = 'https://www.thelog.co.kr/login/loginProc.do'
session = requests.session()

params = dict()
params["loginId"]="oper00"
params["loginPasswd"]="Mktg202!"

res = session.post(login_url,data=params)
res.raise_for_status()


#데이터 가져오기 
for url_list in tqdm(url_lists5):
        res = session.get(url_list)
        gamedata = json.loads(res.text)
        
        try:     
            df = pd.json_normalize(gamedata['myGameRanks'])
            list_of_df.append(df)
            
                
        except:
            pass

100%|████████████████████████████████████████████████████████████████████████████| 77662/77662 [48:38<00:00, 26.61it/s]


In [17]:
df_accum = pd.concat(list_of_df[0:100000], axis=0) #데이터 프레임으로 변환

In [18]:
df_accum2 = pd.concat(list_of_df[100000:200000], axis=0)

In [19]:
df_accum3 = pd.concat(list_of_df[200000:300000], axis=0) # 커넬이 자꾸 죽어서 나눠서 데이터 프레임화
df_accum4 = pd.concat(list_of_df[300000:400000], axis=0)

In [20]:
df_accum

Unnamed: 0,gtrSeq,gameCode,gameName,gameRank,gameRankUpDown,gameShares,gameTypeShares,sharesUpDown,timeCountTotal,preTimeShares,...,sort,stime,etime,sidoCode,sWeek,eWeek,sMonth,eMonth,gameTotalRanks,gameDataType
0,0,545,리그 오브 레전드,1,0,0.0,0.0,0.0,956262,0.0,...,0,,,,,,,,,
1,0,707,오버워치,2,0,0.0,0.0,0.0,266894,0.0,...,0,,,,,,,,,
2,0,1,서든어택,3,0,0.0,0.0,0.0,169841,0.0,...,0,,,,,,,,,
3,0,8,리니지,4,0,0.0,0.0,0.0,106744,0.0,...,0,,,,,,,,,
4,0,4,워크래프트3 리포지드,5,0,0.0,0.0,0.0,71695,0.0,...,0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,0,311,한게임 장기,124,0,0.0,0.0,0.0,420,0.0,...,0,,,,,,,,,
4,0,262,파천일검,125,0,0.0,0.0,0.0,383,0.0,...,0,,,,,,,,,
5,0,320,피망 오목,126,0,0.0,0.0,0.0,213,0.0,...,0,,,,,,,,,
6,0,718,마블히어로즈2016,127,0,0.0,0.0,0.0,202,0.0,...,0,,,,,,,,,


In [21]:
df_accum5 = pd.concat(list_of_df[400000:500000], axis=0) # 커넬이 자꾸 죽어서 나눠서 데이터 프레임화
df_accum6 = pd.concat(list_of_df[500000:], axis=0)

In [23]:
df = pd.concat([df_accum,df_accum2,df_accum3,df_accum4,df_accum5,df_accum6], axis=0)

In [24]:
df

Unnamed: 0,gtrSeq,gameCode,gameName,gameRank,gameRankUpDown,gameShares,gameTypeShares,sharesUpDown,timeCountTotal,preTimeShares,...,sort,stime,etime,sidoCode,sWeek,eWeek,sMonth,eMonth,gameTotalRanks,gameDataType
0,0,545,리그 오브 레전드,1,0,0.0,0.0,0.0,956262,0.0,...,0,,,,,,,,,
1,0,707,오버워치,2,0,0.0,0.0,0.0,266894,0.0,...,0,,,,,,,,,
2,0,1,서든어택,3,0,0.0,0.0,0.0,169841,0.0,...,0,,,,,,,,,
3,0,8,리니지,4,0,0.0,0.0,0.0,106744,0.0,...,0,,,,,,,,,
4,0,4,워크래프트3 리포지드,5,0,0.0,0.0,0.0,71695,0.0,...,0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,0,785,Left 4 Dead 2,64,0,0.0,0.0,0.0,1030,0.0,...,0,,,,,,,,,
4,0,66,거상,65,0,0.0,0.0,0.0,486,0.0,...,0,,,,,,,,,
5,0,79,한게임 라스베가스 포커,66,0,0.0,0.0,0.0,325,0.0,...,0,,,,,,,,,
6,0,26,한게임 7포커,67,0,0.0,0.0,0.0,298,0.0,...,0,,,,,,,,,


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7533357 entries, 0 to 7
Columns: 105 entries, gtrSeq to gameDataType
dtypes: float64(28), int64(56), object(21)
memory usage: 5.9+ GB


In [29]:
df.to_csv('gamedetail.csv') # csv 파일로 내보내기

In [30]:
dup = df.duplicated()
dup

MemoryError: Unable to allocate 57.5 MiB for an array with shape (7533357,) and data type int64