In [44]:
#Først importeres de relevante packages til at downloade og åbne zipfilen
import zipfile
import urllib.request
import shutil
import pandas as pd

#Hjemmesiden og filnavnet specificeres
url = 'http://www.ices.dk/marine-data/Documents/CatchStats/OfficialNominalCatches.zip'
file_name = 'OfficialNominalCatches.zip'

#Vi "kalder" zip-filen fra url'en vha. urllib.request
with urllib.request.urlopen(url) as response, open(file_name, 'wb') as out_file:
    shutil.copyfileobj(response, out_file)
    with zipfile.ZipFile(file_name) as zf:
        zf.extractall()

In [45]:
# Read data from file 'filename.csv' 
# (in the same directory that your python process is based)
# Control delimiters, rows, column names with read_csv (see later) 
data = pd.read_csv("ICESCatchDataset2006-2016.csv") 

# Preview the first 5 lines of the loaded data 
data.head()

Unnamed: 0,Species,Area,Units,Country,2016,2015,2014,2013,2012,2011,...,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,ANF,27.4,TLW,BE,253.3,200.3,216.9,136.8,132.8,116.4,...,,,,,,,,,,
1,ANF,27.7,TLW,BE,963.4,722.8,491.1,1123.8,1381.5,966.1,...,,,,,,,,,,
2,ANF,27.8,TLW,BE,216.0,181.0,285.4,372.5,201.2,196.0,...,,,,,,,,,,
3,ANF,27,TLW,BE,1432.7,1104.1,993.4,1633.1,1715.5,1278.5,...,,,,,,,,,,
4,ANF,27.4.a,TLW,BE,0.0,0.0,0.0,0.1,0.0,0.0,...,,,,,,,,,,


In [46]:
#rename columns to avoid having only numbers
yearDict = {}
for i in range(2006, 2017): # range goes from 2006 to but not including 2017
    yearDict[str(i)] = f'y{i}' 

data.rename(columns = yearDict, inplace=True)
data.head()

Unnamed: 0,Species,Area,Units,Country,y2016,y2015,y2014,y2013,y2012,y2011,...,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,ANF,27.4,TLW,BE,253.3,200.3,216.9,136.8,132.8,116.4,...,,,,,,,,,,
1,ANF,27.7,TLW,BE,963.4,722.8,491.1,1123.8,1381.5,966.1,...,,,,,,,,,,
2,ANF,27.8,TLW,BE,216.0,181.0,285.4,372.5,201.2,196.0,...,,,,,,,,,,
3,ANF,27,TLW,BE,1432.7,1104.1,993.4,1633.1,1715.5,1278.5,...,,,,,,,,,,
4,ANF,27.4.a,TLW,BE,0.0,0.0,0.0,0.1,0.0,0.0,...,,,,,,,,,,


In [47]:
#Drop columns if all values are NaN
data.dropna(axis="columns", how="all", inplace=True)
data.head()

Unnamed: 0,Species,Area,Units,Country,y2016,y2015,y2014,y2013,y2012,y2011,y2010,y2009,y2008,y2007,y2006
0,ANF,27.4,TLW,BE,253.3,200.3,216.9,136.8,132.8,116.4,131.2,139.6,184.9,181.3,141.1
1,ANF,27.7,TLW,BE,963.4,722.8,491.1,1123.8,1381.5,966.1,720.5,518.2,584.7,1039.5,907.0
2,ANF,27.8,TLW,BE,216.0,181.0,285.4,372.5,201.2,196.0,179.3,195.1,194.3,142.4,144.4
3,ANF,27,TLW,BE,1432.7,1104.1,993.4,1633.1,1715.5,1278.5,1031.0,852.9,963.9,1363.2,1192.5
4,ANF,27.4.a,TLW,BE,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [48]:
#Drop if countries is different from Denmark
data = data[data['Country']=='DK']

data.head(10)

Unnamed: 0,Species,Area,Units,Country,y2016,y2015,y2014,y2013,y2012,y2011,y2010,y2009,y2008,y2007,y2006
3876,AAS,27.3,TLW,DK,0.0,0.0,0.0,0.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3877,AAS,27,TLW,DK,0.03,0.0,0.0,0.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3878,AAS,27.3.a,TLW,DK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3879,AAS,27.3.c.22,TLW,DK,0.0,0.0,0.0,0.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3880,AAS,27.4,TLW,DK,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3881,AAS,27.4.b,TLW,DK,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3882,ABK,27.4,TLW,DK,0.03,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3883,ABK,27,TLW,DK,0.03,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3884,ABK,27.4.b,TLW,DK,0.03,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3885,AFT,27.4,TLW,DK,0.0,2.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
#Drop if sum of rows is equal to 0
data1 = data.loc[(data!=0).all(axis=0)]
data1.head(10)

IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match