Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ballomud committed May 7, 2015
1 parent fe3a5db commit 4c3da83
Showing 1 changed file with 10 additions and 41 deletions.
51 changes: 10 additions & 41 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,30 +43,8 @@

#%%

def ExtractPlayerDF(Data):
colNames = ['Date', 'Round', 'Opponent', 'MP', 'GS', 'A', 'CS', 'GC', 'OG', 'PS',
'PM', 'YC', 'RC', 'S', 'B', 'ESP', 'BPS', 'NT', 'Value', 'Points']
fixtures = Data['fixture_history']['all']
playerDF = pandas.DataFrame(fixtures, columns = colNames)

playerDF['ID'] = Data['id']
playerDF['Code'] = Data['code']
playerDF['WebName'] = Data['web_name']
playerDF['FirstName'] = Data['first_name']
playerDF['SecondName'] = Data['second_name']
playerDF['Position'] = Data['type_name']
playerDF['Team'] = Data['team_name']

colOrder = ['ID', 'Code', 'Round', 'WebName', 'FirstName', 'SecondName', 'Position', 'Team',
'Date', 'Opponent', 'MP', 'GS', 'A', 'CS', 'GC', 'OG', 'PS', 'PM', 'YC',
'RC', 'S', 'B', 'ESP', 'BPS', 'NT', 'Value', 'Points']

return playerDF[colOrder]

#%%

## Download dados
print '[LOG] Downloading Data Started'
print '[LOG] Download Iniciado'

# Inicialisa browser
br = mechanize.Browser()
Expand Down Expand Up @@ -101,25 +79,21 @@ def ExtractPlayerDF(Data):
if pgAtual == pgTotal:
break

print '[LOG] Downloading Data Ended'
print '[LOG] Download Terminado'

#%%

## Minera Rodada
rodada = jsonRaw[0]['rodada_id'] - 1
## Minera Scouts
## e concatena em um DataFrame

#%%
print '[LOG] Processando dados'

# Concatena lista de atletas dos arquivos
atletasJSON = [j['atleta'] for j in jsonRaw]
atletasJSON = list(it.chain(*atletasJSON))

#%%

## Minera Scouts
## e concatena em um DataFrame

print '[LOG] Processing Data Started'
## Minera Rodada
rodada = jsonRaw[0]['rodada_id'] - 1

# Scouts
ScoutsDict = []
Expand All @@ -142,19 +116,14 @@ def ExtractPlayerDF(Data):
scoutDict['PartidaData'] = atleta['partida_data']

ScoutsDict.append(scoutDict)

#ScoutsDFColOrder = ['Rodada', 'Atleta', 'Apelido', 'Clube', 'Posicao', 'Status', 'Pontos', 'PontosMedia', 'Preco',
# 'PrecoVariacao', 'Mando', 'PartidaCasa', 'PartidaVisitante', 'PartidaData', 'FS', 'PE', 'A', 'FT',
# 'FD', 'FF', 'G', 'I', 'PP', 'RB', 'FC', 'GC', 'CA', 'CV', 'SG', 'DD', 'DP', 'GS']
#ScoutsDF = pd.DataFrame(ScoutsDict, columns = ScoutsDFColOrder)

print '[LOG] Processing Data Ended'
print '[LOG] Processamento de dados terminado'

#%%

## Save DataFrame to SQLite
## Salva dados para SQLite

print '[LOG] Transfering data to SQLite format'
print '[LOG] Salvando dados'

scraperwiki.sqlite.save(unique_keys = ['Atleta', 'Rodada'],
data = ScoutsDict)
Expand Down

0 comments on commit 4c3da83

Please sign in to comment.