In [None]:
import pandas as pd
import numpy as np
from datetime import datetime,date
from dateutil.relativedelta import relativedelta
import time
from tqdm import tqdm
from dateutil.parser import parse
import regex as re

In [None]:
path = "attacks.csv"
with open(path,encoding="utf8",errors="ignore") as f:
    sharks_original = pd.read_csv(f)

### Limpieza general

Data cleaning:
- Missing data (Nulos)
- Outliers
- Unnecessary data (repetitivos, duplicados, etc.)
- Inconsistent data (normalizacion, estandarizacion, etc.)

**Primera limpieza**

In [None]:
# Mi df inicial
sharks_original

# Creo una copia para trabajar
sharks = sharks_original.copy()

In [None]:
sharks

In [None]:
# Quito columnas y filas que no aportan nada
# Quito filas donde todos los values sean nulos
sharks.dropna(how="all",inplace=True)
sharks

In [None]:
# Quito las filas duplicadas y me quedo con la primera ocurrencia
sharks.drop_duplicates(keep="first",inplace=True)

In [None]:
sharks.columns

In [None]:
# Existen columnas que aparentemente solo contienen nulos. Lo compruebo:
sharks["Unnamed: 22"].value_counts()

In [None]:
sharks["Unnamed: 23"].value_counts()

In [None]:
# Creo una tabla para ver de manera visual cuantos y que % de nulos tiene cada columna
dicc = {}
for i in sharks.columns:
    dicc[i]=sharks[i].isnull().sum()
df_null = pd.DataFrame(dicc,index=["Count"]).T
df_null["%"] = ((df_null["Count"]/len(sharks))*100).round(1)
df_null.sort_values("Count",ascending=False)

In [None]:
# Quitamos las columnas que tienen 100% nulos
sharks.drop(["Unnamed: 22","Unnamed: 23"],axis=1,inplace=True)

In [None]:
sharks

In [None]:
sharks.columns

In [None]:
# Borramos las columnas que no queremos analizar
sharks.drop(["Location","Investigator or Source","pdf","href formula",'href','original order'],axis=1,inplace=True)

In [None]:
(sharks["Case Number.1"]==sharks["Case Number.2"]).value_counts()

### Normalizacion de columnas

In [None]:
sharks.columns

In [None]:
sharks.columns=['case', 'date', 'year', 'type', 'country','area','activity', 'name',
       'sex', 'age', 'injury', 'fatal', 'time', 'species',
       'case_1', 'case_2']

In [None]:
sharks

In [None]:
sharks.to_csv("sharks.csv")

# Limpieza de fechas

In [None]:
list(sharks.date)

## Método 1

In [None]:
# Creo una copia de mi dataframe para no trabajar sobre el original

df_dates = sharks.copy()

# Mi nuevo dataframe tiene solo las columnas a estudar

df_dates = df_dates[["case","date","year"]]

In [None]:
# Sustituimos simbolos de separacion para estandarizar
# El estudio se va a centrar en la columna case, la cual contiene mas informacion acerca de la fecha que el resto

df_dates["case"] = [re.sub(r"[\s/_-]",".",str(i)) for i in df_dates.case]

In [None]:
# Para cada value, atrapamos el formato con una expresion regex y le aplicamos el método strptime para pasarlo a tipo datetime
# Si no se pudiese hacer la conversion, atrapamos el error y pasamos
# Anidamos trys para para ejecutar el siguiente try si el anterior ha dado error

for i in tqdm(df_dates.index):
    
    try:
        df_dates.loc[i,"case"] = datetime.strptime("".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)[-/.](?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])",str(df_dates["case"][i]))),"%Y.%m.%d")#.date()
    except ValueError:      
        pass
    
        try: 
            df_dates.loc[i,"case"] = datetime.strptime("".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)[-/.](?:0[1-9]|1[012])",str(df_dates["case"][i]))),"%Y.%m")#.date()
        except ValueError:
            pass

            try: 
                df_dates.loc[i,"case"] = datetime.strptime("".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)",str(df_dates["case"][i]))),"%Y").year
            except ValueError:
                df_dates.loc[i,"case"] = np.nan

# Ahora tenemos la columna case con valores tipo datetime que podemos manipular

In [None]:
# Nos interesa atrapar el año en una columna a parte y compararlo con la columna de año existente para comparar

# df_dates["year_2"] = [i.year if isinstance(i,datetime) else np.nan for i in df_dates.case]
new_year_2 = []
for i in df_dates.case:
    if isinstance(i,datetime):
        new_year_2.append(i.year)
    elif isinstance(i,int):
        new_year_2.append(i)
    else:
        new_year_2.append(np.nan)

df_dates["year_2"] = new_year_2

# Compruebo en cuantos values difieren

(df_dates["year_2"]==df_dates["year"]).value_counts()

# Creo una lista de tuplas con el siguiente formato: (indice real,valor year, valor year_2, valor date)

year_compr = [(i,df_dates["year"][i],df_dates["year_2"][i],df_dates["date"][i]) for i in df_dates.index if df_dates["year_2"][i]!=df_dates["year"][i]]

# La paso a dataframe nombrando las columnas para poder manipularla mejor

year_compr = pd.DataFrame(year_compr,columns=["indice","year1","year2","comment"])
year_compr

In [None]:
# Código para identificar con que año nos quedamos
# Atrapo el año de la columna date con regex (si es mayor a 1500)
# Si el año coincide con alguna de las dos columnas (year o year_2), lo añado a year_2, si no, np.nan

for i in year_compr["indice"]:
    
        if str(df_dates.year[i])[0:4]=="".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)",str(df_dates.date[i]))):
            df_dates.loc[i,"year_2"] = df_dates["year"][i]
            continue
    
        elif str(df_dates.year_2[i])[0:4]=="".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)",str(df_dates.date[i]))):
            continue

        else:
            df_dates.loc[i,"year_2"] = np.nan

# Vuelvo a lanzar el código que me crea una lista para comprobar los años que difieren de year y year_2

year_compr = [(i,df_dates["year"][i],df_dates["year_2"][i],df_dates["date"][i]) for i in df_dates.index if df_dates["year_2"][i]!=df_dates["year"][i]]

len(year_compr)

# Hemos rescatado 5 años

In [None]:
# Finalmente obtenemos las columnas con el año completo y el año limpias

df_dates[["case","year_2"]]

### Funciones

In [None]:
def date_clean(df,column):
    """
    La funcion date_clean acepta como parametro un DataFrame y el nombre de la columna que quieres limpiar.
    Devuelve una Serie con todos los valores de la columna seleccionada convertidos a tipo Datetime.
    Los que no ha podido convertir, los pone nulos.
    """
    
    df[column] = [re.sub(r"[\s/_-]",".",str(i)) for i in df[column]]
    
    for i in tqdm(df.index):
    
        try:
            df.loc[i,column] = datetime.strptime("".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)[-/.](?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])",str(df[column][i]))),"%Y.%m.%d")
        except ValueError:      
            pass

            try: 
                df.loc[i,column] = datetime.strptime("".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)[-/.](?:0[1-9]|1[012])",str(df[column][i]))),"%Y.%m")
            except ValueError:
                pass

                try: 
                    df.loc[i,column] = datetime.strptime("".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)",str(df[column][i]))),"%Y").year
                except ValueError:
                    df.loc[i,column] = np.nan
    
    return df[column]

def new_year(column):
    """
    La funcion new_year extrae el año de la columna tipo Datetime que le pases como parametro.
    Devuelve una lista con años tipo Int.
    Los años que no ha podido extraer serán nulos.
    """
        
    new_year_2 = []
    
    for i in tqdm(column):
        
        if isinstance(i,datetime):
            new_year_2.append(i.year)
            
        elif isinstance(i,int):
            new_year_2.append(i)
            
        else:
            new_year_2.append(np.nan)
            
    return new_year_2
                        
def year_clean(df,year_1,year_2,compr_column):
    """
    ----DEPRECATED----
    La funcion coge como parametros el dataframe, dos columnas con años que se quieran comparar, y una tercera columna
    que sirva como criterio de comprobacion.
    Devuelve una nueva lista con los años que haya escogido entre year_1 y year_2 teniendo en cuenta la informacion recogida de
    compr_column.
    ----HAY QUE HACER QUE ACTUALICE LA COLUMNA QUE LE PASES MANTENIENDO SU LENGHT----
    """
    
    year_compr = [(i,df[year_1][i],df[year_2][i],df[compr_column][i]) for i in df.index if df[year_2][i]!=df[year_1][i]]

    year_compr = pd.DataFrame(year_compr,columns=["indice","year1","year2","comment"])
    
    new_year_list = []
    
    for i in tqdm(year_compr["indice"]):
    
        if str(df[year_1][i])[0:4]=="".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)",str(df[compr_column][i]))):
            new_year_list.append(df[year_1][i])
            continue
    
        elif str(df[year_2][i])[0:4]=="".join(re.findall(r"(?:1[5-9]\d{2}|20[0-2][0-9]|2022)",str(df[compr_column][i]))):
            new_year_list.append(df[year_2][i])
            continue

        else:
            new_year_list.append(np.nan)
            
    return new_year_list

In [None]:
df_dates["case"] = date_clean(df_dates,"case")

In [None]:
df_dates["nuevo_año"] = new_year(df_dates["case"])

In [None]:
list(year_clean(df_dates,"year","nuevo_año","date"))

## Método 2 -DEPRECATED-

In [None]:
# La intencion de esta funcion es que acepte cualquier tipo de fecha, para que independientemente del formato que
# le pases, te lo pueda convertir en datetime

# ---INACABADA---

In [None]:
df_dates_2 = sharks.copy()
df_dates_2 = df_dates_2[["case","date","year"]]

In [None]:
# Lista de formatos aceptados 
formats = ["%d%m%Y","%d%b%Y","%d%B%Y","%d%m%y","%d%b%y","%d%B%y","%m%Y","%m%y","%b%Y","%b%y","%B%Y","%B%y","%Y","%y","%Y%m%d","%Y%b%d","%Y%B%d","%y%m%d","%y%b%d","%y%B%d"]

In [None]:
df_dates_2["case"]=[re.sub(r"[.-/\s_]","",str(i)) for i in df_dates_2.case]
df_dates_2["case"]=["".join(re.findall(r"(?:1[6-9]\d{2}|20[0-2][0-9]|2022)(?:0[1-9]|1[012])(?:0[1-9]|[12][0-9]|3[01])",str(i))) for i in df_dates_2.case]

In [None]:
for i in df_dates_2.index:
    for f in formats:
        try:
            df_dates_2.loc[i,"case"]=datetime.strptime(str(df_dates_2.case[i]),f).date()
            break
        except ValueError:
            pass

In [None]:
df_dates_2.case.value_counts()

# Limpieza edades

In [None]:
list(sharks["age"][4000:5000])

In [None]:
# Una manera es pasar todo lo que se pueda a int capturando el error. Si da error ponemos nulo.

In [None]:
age_test = sharks.copy()[["age","country"]]
age_test

In [None]:
new_age = []
for i in age_test["age"]:
    try:
         new_age.append(int(str(i).strip()))
    except ValueError:
        new_age.append(np.nan)
age_test["age"] = new_age

# Limpieza de paises

In [None]:
age_test["country"].str.upper().str.strip().unique()

In [None]:
from country_list import available_languages
from country_list import countries_for_language

In [None]:
import pycountry as pc

In [None]:
pc.countries.index_names

In [None]:
countries_list = {
"AF":"Afghanistan",
"AX":"Aland Islands",
"AL":"Albania",
"DZ":"Algeria",
"AS":"American Samoa",
"AD":"Andorra",
"AO":"Angola",
"AI":"Anguilla",
"AQ":"Antarctica",
"AG":"Antigua and Barbuda",
"AR":"Argentina",
"AM":"Armenia",
"AW":"Aruba",
"AU":"Australia",
"AT":"Austria",
"AZ":"Azerbaijan",
"BS":"Bahamas",
"BH":"Bahrain",
"BD":"Bangladesh",
"BB":"Barbados",
"BY":"Belarus",
"BE":"Belgium",
"BZ":"Belize",
"BJ":"Benin",
"BM":"Bermuda",
"BT":"Bhutan",
"BO":"Bolivia, Plurinational State of",
"BQ":"Bonaire, Sint Eustatius and Saba",
"BA":"Bosnia and Herzegovina",
"BW":"Botswana",
"BV":"Bouvet Island",
"BR":"Brazil",
"IO":"British Indian Ocean Territory",
"BN":"Brunei Darussalam",
"BG":"Bulgaria",
"BF":"Burkina Faso",
"BI":"Burundi",
"KH":"Cambodia",
"CM":"Cameroon",
"CA":"Canada",
"CV":"Cape Verde",
"KY":"Cayman Islands",
"CF":"Central African Republic",
"TD":"Chad",
"CL":"Chile",
"CN":"China",
"CX":"Christmas Island",
"CC":"Cocos (Keeling) Islands",
"CO":"Colombia",
"KM":"Comoros",
"CG":"Congo",
"CD":"Congo, The Democratic Republic of the",
"CK":"Cook Islands",
"CR":"Costa Rica",
"CI":"Côte d'Ivoire",
"HR":"Croatia",
"CU":"Cuba",
"CW":"Curaçao",
"CY":"Cyprus",
"CZ":"Czech Republic",
"DK":"Denmark",
"DJ":"Djibouti",
"DM":"Dominica",
"DO":"Dominican Republic",
"EC":"Ecuador",
"EG":"Egypt",
"SV":"El Salvador",
"GQ":"Equatorial Guinea",
"ER":"Eritrea",
"EE":"Estonia",
"ET":"Ethiopia",
"FK":"Falkland Islands (Malvinas)",
"FO":"Faroe Islands",
"FJ":"Fiji",
"FI":"Finland",
"FR":"France",
"GF":"French Guiana",
"PF":"French Polynesia",
"TF":"French Southern Territories",
"GA":"Gabon",
"GM":"Gambia",
"GE":"Georgia",
"DE":"Germany",
"GH":"Ghana",
"GI":"Gibraltar",
"GR":"Greece",
"GL":"Greenland",
"GD":"Grenada",
"GP":"Guadeloupe",
"GU":"Guam",
"GT":"Guatemala",
"GG":"Guernsey",
"GN":"Guinea",
"GW":"Guinea-Bissau",
"GY":"Guyana",
"HT":"Haiti",
"HM":"Heard Island and McDonald Islands",
"VA":"Holy See (Vatican City State)",
"HN":"Honduras",
"HK":"Hong Kong",
"HU":"Hungary",
"IS":"Iceland",
"IN":"India",
"ID":"Indonesia",
"IR":"Iran, Islamic Republic of",
"IQ":"Iraq",
"IE":"Ireland",
"IM":"Isle of Man",
"IL":"Israel",
"IT":"Italy",
"JM":"Jamaica",
"JP":"Japan",
"JE":"Jersey",
"JO":"Jordan",
"KZ":"Kazakhstan",
"KE":"Kenya",
"KI":"Kiribati",
"KP":"Korea, Democratic People's Republic of",
"KR":"Korea, Republic of",
"KW":"Kuwait",
"KG":"Kyrgyzstan",
"LA":"Lao People's Democratic Republic",
"LV":"Latvia",
"LB":"Lebanon",
"LS":"Lesotho",
"LR":"Liberia",
"LY":"Libya",
"LI":"Liechtenstein",
"LT":"Lithuania",
"LU":"Luxembourg",
"MO":"Macao",
"MK":"Macedonia, Republic of",
"MG":"Madagascar",
"MW":"Malawi",
"MY":"Malaysia",
"MV":"Maldives",
"ML":"Mali",
"MT":"Malta",
"MH":"Marshall Islands",
"MQ":"Martinique",
"MR":"Mauritania",
"MU":"Mauritius",
"YT":"Mayotte",
"MX":"Mexico",
"FM":"Micronesia, Federated States of",
"MD":"Moldova, Republic of",
"MC":"Monaco",
"MN":"Mongolia",
"ME":"Montenegro",
"MS":"Montserrat",
"MA":"Morocco",
"MZ":"Mozambique",
"MM":"Myanmar",
"NA":"Namibia",
"NR":"Nauru",
"NP":"Nepal",
"NL":"Netherlands",
"NC":"New Caledonia",
"NZ":"New Zealand",
"NI":"Nicaragua",
"NE":"Niger",
"NG":"Nigeria",
"NU":"Niue",
"NF":"Norfolk Island",
"MP":"Northern Mariana Islands",
"NO":"Norway",
"OM":"Oman",
"PK":"Pakistan",
"PW":"Palau",
"PS":"Palestinian Territory, Occupied",
"PA":"Panama",
"PG":"Papua New Guinea",
"PY":"Paraguay",
"PE":"Peru",
"PH":"Philippines",
"PN":"Pitcairn",
"PL":"Poland",
"PT":"Portugal",
"PR":"Puerto Rico",
"QA":"Qatar",
"RE":"Réunion",
"RO":"Romania",
"RU":"Russian Federation",
"RW":"Rwanda",
"BL":"Saint Barthélemy",
"SH":"Saint Helena, Ascension and Tristan da Cunha",
"KN":"Saint Kitts and Nevis",
"LC":"Saint Lucia",
"MF":"Saint Martin (French part)",
"PM":"Saint Pierre and Miquelon",
"VC":"Saint Vincent and the Grenadines",
"WS":"Samoa",
"SM":"San Marino",
"ST":"Sao Tome and Principe",
"SA":"Saudi Arabia",
"SN":"Senegal",
"RS":"Serbia",
"SC":"Seychelles",
"SL":"Sierra Leone",
"SG":"Singapore",
"SX":"Sint Maarten (Dutch part)",
"SK":"Slovakia",
"SI":"Slovenia",
"SB":"Solomon Islands",
"SO":"Somalia",
"ZA":"South Africa",
"GS":"South Georgia and the South Sandwich Islands",
"ES":"Spain",
"LK":"Sri Lanka",
"SD":"Sudan",
"SR":"Suriname",
"SS":"South Sudan",
"SJ":"Svalbard and Jan Mayen",
"SZ":"Swaziland",
"SE":"Sweden",
"CH":"Switzerland",
"SY":"Syrian Arab Republic",
"TW":"Taiwan, Province of China",
"TJ":"Tajikistan",
"TZ":"Tanzania, United Republic of",
"TH":"Thailand",
"TL":"Timor-Leste",
"TG":"Togo",
"TK":"Tokelau",
"TO":"Tonga",
"TT":"Trinidad and Tobago",
"TN":"Tunisia",
"TR":"Turkey",
"TM":"Turkmenistan",
"TC":"Turks and Caicos Islands",
"TV":"Tuvalu",
"UG":"Uganda",
"UA":"Ukraine",
"AE":"United Arab Emirates",
"GB":"United Kingdom",
"US":"United States",
"UM":"United States Minor Outlying Islands",
"UY":"Uruguay",
"UZ":"Uzbekistan",
"VU":"Vanuatu",
"VE":"Venezuela, Bolivarian Republic of",
"VN":"Viet Nam",
"VG":"Virgin Islands, British",
"VI":"Virgin Islands, U.S.",
"WF":"Wallis and Futuna",
"YE":"Yemen",
"ZM":"Zambia",
"ZW":"Zimbabwe"
}

In [None]:
paises = countries_list.values()

In [None]:
for i in paises:
    if i.upper() in age_test["country"].str.upper().str.strip().unique():
        print("yes")

In [None]:
# [MESES,AÑOS,COUNTRIES,AGES,FATAL,SEX]

# Nº ATAQUES POR AÑO //
# Nº ATAQUES POR MESES -> POR ESTACIONES?
# Nº ATAQUES POR COUNTRY
# Nº ATAQUES POR AGE
# Nº ATAQUES POR SEX
# % SUPERVIVENCIA
    # % DE SUPERVIVENCIA EN MUJERES Y HOMBRES
    # % DE SUPERVIVENCIA POR FRANJA DE EDAD

# PEOR CASO

In [None]:
# Quedarnos con provoked, unprovoked y nulos
sharks["type"].value_counts()

In [None]:
# Definir nosotros las actividades que queremos analizar y el resto fuera
list(sharks["activity"].unique())

In [None]:
# Hacer match con una lista de especies posibles
list(sharks["species"].unique())

In [None]:
            
            Carcharhinus acronotus : Blacknose shark
            Carcharhinus albimarginatus : Silvertip shark
            Carcharhinus altimus : Bignose shark
            Carcharhinus amblyrhynchoides : Graceful shark
            Carcharhinus amblyrhynchos : Grey reef shark
            Carcharhinus amboinensis : Pigeye shark
            Carcharhinus borneensis : Borneo shark
            Carcharhinus brachyurus : Copper shark
            Carcharhinus brevipinna : Spinner shark
            Carcharhinus cautus : Nervous shark
            Carcharhinus coatesi : Coates's shark
            Carcharhinus dussumieri : Whitecheek shark
            Carcharhinus falciformis : Silky shark
            Carcharhinus fitzroyensis : Creek whaler
            Carcharhinus galapagensis : Galapagos shark
            Carcharhinus hemiodon : Pondicherry shark
            Carcharhinus humani : Human's whaler shark
            Carcharhinus isodon : Finetooth shark
            Carcharhinus leiodon : Smoothtooth blacktip shark
            Carcharhinus leucas : Bull shark
            Carcharhinus limbatus : Blacktip shark
            Carcharhinus longimanus : Oceanic whitetip shark
            Carcharhinus macloti : Hardnose shark
            Carcharhinus melanopterus : Blacktip reef shark
            Carcharhinus obscurus : Dusky shark
            Carcharhinus perezi : Caribbean reef shark
            Carcharhinus plumbeus : Sandbar shark
            Carcharhinus porosus : Smalltail shark
            Carcharhinus sealei : Blackspot shark
            Carcharhinus signatus : Night shark
            Carcharhinus sorrah : Spot-tail shark
            Carcharhinus tilstoni : Australian blacktip shark
            Carcharhinus tjutjot : Indonesian whaler shark
            Galeocerdo cuvier : Tiger shark
            Glyphis fowlerae : Borneo river shark
            Glyphis gangeticus : Ganges shark
            Glyphis garricki : Northern river shark
            Glyphis glyphis : Speartooth shark
            Glyphis siamensis : Irrawaddy river shark
            Isogomphodon oxyrhynchus : Daggernose shark
            Lamiopsis temminckii : Broadfin shark
            Lamiopsis tephrodes : Borneo broadfin shark
            Loxodon macrorhinus : Sliteye shark
            Nasolamia velox : Whitenose shark
            Negaprion acutidens : Sicklefin lemon shark
            Negaprion brevirostris : Lemon shark
            Prionace glauca : Blue shark
            Rhizoprionodon acutus : Milk shark
            Rhizoprionodon lalandii : Brazilian sharpnose shark
            Rhizoprionodon longurio : Pacific sharpnose shark
            Rhizoprionodon oligolinx : Grey sharpnose shark
            Rhizoprionodon porosus : Caribbean sharpnose shark
            Rhizoprionodon taylori : Australian sharpnose shark
            Rhizoprionodon terraenovae : Atlantic sharpnose shark
            Scoliodon laticaudus : Spadenose shark
            Scoliodon macrorhynchos : Pacific spadenose shark
            Chaenogaleus macrostoma : Hooktooth shark
            Hemigaleus australiensis : Australian weasel shark
            Hemigaleus microstoma : Sicklefin weasel shark
            Hemipristis elongata : Snaggletooth shark
            Paragaleus leucolomatus : Whitetip weasel shark
            Paragaleus pectoralis : Atlantic weasel shark
            Paragaleus randalli : Slender weasel shark
            Paragaleus tengi : Straight-tooth weasel shark
            Leptocharias smithii : Barbeled houndshark
            Ctenacis fehlmanni : Harlequin catshark
            Eridacnis barbouri : Cuban ribbontail catshark
            Eridacnis radcliffei : Pygmy ribbontail catshark
            Eridacnis sinuans : African ribbontail catshark
            Proscyllium habereri : Graceful catshark
            Proscyllium magnificum : Magnificent catshark
            Gollum attenuatus : [No common name]
            Gollum suluensis : [No common name]
            Planonasus parini : Dwarf false catshark
            Pseudotriakis microdon : False catshark
            Apristurus albisoma : White-bodied catshark
            Apristurus ampliceps : Roughskin catshark
            Apristurus aphyodes : White ghost catshark
            Apristurus australis : Pinocchio catshark
            Apristurus brunneus : Brown catshark
            Apristurus bucephalus : Bighead catshark
            Apristurus canutus : Hoary catshark
            Apristurus exsanguis : Flaccid catshark
            Apristurus fedorovi : Fedorov's catshark
            Apristurus garricki : Garrick's catshark
            Apristurus gibbosus : Humpback catshark
            Apristurus herklotsi : Longfin catshark
            Apristurus indicus : Smallbelly catshark
            Apristurus internatus : Shortnose demon catshark
            Apristurus investigatoris : Broadnose catshark
            Apristurus japonicus : Japanese catshark
            Apristurus kampae : Longnose catshark
            Apristurus laurussonii : Iceland catshark
            Apristurus longicephalus : Longhead catshark
            Apristurus macrorhynchus : Flathead catshark
            Apristurus macrostomus : Broadmouth catshark
            Apristurus manis : Ghost catshark
            Apristurus melanoasper : Black roughscale catshark
            Apristurus microps : Smalleye catshark
            Apristurus micropterygeus : Smalldorsal catshark
            Apristurus nakayai : Milk-eye catshark
            Apristurus nasutus : Largenose catshark
            Apristurus parvipinnis : Smallfin catshark
            Apristurus pinguis : Fat catshark
            Apristurus platyrhynchus : Spatulasnout catshark
            Apristurus profundorum : Deep-water catshark
            Apristurus riveri : Broadgill catshark
            Apristurus saldanha : Saldanha catshark
            Apristurus sibogae : Pale catshark
            Apristurus sinensis : South China catshark
            Apristurus spongiceps : Spongehead catshark
            Apristurus stenseni : Panama ghost catshark
            Asymbolus analis : Australian spotted catshark
            Asymbolus funebris : Blotched catshark
            Asymbolus galacticus : Starry catshark
            Asymbolus occiduus : Western spotted catshark
            Asymbolus pallidus : Pale spotted catshark
            Asymbolus parvus : Dwarf catshark
            Asymbolus rubiginosus : Orange spotted catshark
            Asymbolus submaculatus : Variegated catshark
            Asymbolus vincenti : Gulf catshark
            Atelomycterus baliensis : Bali catshark
            Atelomycterus fasciatus : Banded sand catshark
            Atelomycterus macleayi : Australian marbled catshark
            Atelomycterus marmoratus : Coral catshark
            Atelomycterus marnkalha : Eastern banded catshark
            Aulohalaelurus kanakorum : New Caledonia catshark
            Aulohalaelurus labiosus : Australian blackspotted catshark
            Bythaelurus canescens : Dusky catshark
            Bythaelurus clevai : Broadhead catshark
            Bythaelurus dawsoni : New Zealand catshark
            Bythaelurus giddingsi : GalÃ¡pagos catshark
            Bythaelurus hispidus : Bristly catshark
            Bythaelurus immaculatus : Spotless catshark
            Bythaelurus incanus : Sombre catshark
            Bythaelurus lutarius : Mud catshark
            Cephaloscyllium albipinnum : Whitefin swellshark
            Cephaloscyllium cooki : Cook's swellshark
            Cephaloscyllium fasciatum : Reticulated swellshark
            Cephaloscyllium formosanum : Formosa swellshark
            Cephaloscyllium hiscosellum : Australian reticulate swellshark
            Cephaloscyllium isabellum : Draughtsboard shark
            Cephaloscyllium laticeps : Australian swellshark
            Cephaloscyllium pictum : Painted swellshark
            Cephaloscyllium sarawakensis : Sarawak pygmy swellshark
            Cephaloscyllium signourum : Flagtail swellshark
            Cephaloscyllium silasi : Indian swellshark
            Cephaloscyllium speccum : Speckled swellshark
            Cephaloscyllium stevensi : Steven's swellshark
            Cephaloscyllium sufflans : Balloon shark
            Cephaloscyllium umbratile : Blotchy swellshark
            Cephaloscyllium variegatum : Saddled swellshark
            Cephaloscyllium ventriosum : Swellshark
            Cephaloscyllium zebrum : Narrowbar swellshark
            Cephalurus cephalus : Lollipop catshark
            Figaro boardmani : Australian sawtail catshark
            Figaro striatus : Northern sawtail catshark
            Galeus antillensis : Antilles catshark
            Galeus arae : Roughtail catshark
            Galeus atlanticus : Atlantic sawtail catshark
            Galeus cadenati : Longfin sawtail catshark
            Galeus eastmani : Gecko catshark
            Galeus gracilis : Slender sawtail catshark
            Galeus longirostris : Longnose sawtail catshark
            Galeus melastomus : Blackmouth catshark
            Galeus mincaronei : Southern sawtail catshark
            Galeus murinus : Mouse catshark
            Galeus nipponensis : Broadfin sawtail catshark
            Galeus piperatus : Peppered catshark
            Galeus polli : African sawtail catshark
            Galeus priapus : Phallic catshark
            Galeus sauteri : Blacktip sawtail catshark
            Galeus schultzi : Dwarf sawtail catshark
            Galeus springeri : Springer's sawtail catshark
            Halaelurus boesemani : Speckled catshark
            Halaelurus buergeri : Blackspotted catshark
            Halaelurus lineatus : Lined catshark
            Halaelurus maculosus : Indonesian speckled catshark
            Halaelurus natalensis : Tiger catshark
            Halaelurus quagga : Quagga catshark
            Halaelurus sellus : Rusty catshark
            Haploblepharus edwardsii : Puffadder shyshark
            Haploblepharus fuscus : Brown shyshark
            Haploblepharus kistnasamyi : Natal shyshark
            Haploblepharus pictus : Dark shyshark
            Holohalaelurus favus : Honeycomb Izak
            Holohalaelurus grennian : Grinning Izak
            Holohalaelurus melanostigma : Crying Izak
            Holohalaelurus punctatus : White-spotted Izak
            Holohalaelurus regani : Izak catshark
            Parmaturus albimarginatus : White-tip catshark
            Parmaturus albipenis : White-clasper catshark
            Parmaturus bigus : Beige catshark
            Parmaturus campechiensis : Campeche catshark
            Parmaturus lanatus : Velvet catshark
            Parmaturus macmillani : McMillan's cat shark
            Parmaturus melanobranchus : Blackgill catshark
            Parmaturus pilosus : Salamander shark
            Parmaturus xaniurus : Filetail catshark
            Pentanchus profundicolus : Onefin catshark
            Poroderma africanum : Pyjama shark
            Poroderma pantherinum : Leopard catshark
            Schroederichthys bivius : Narrowmouthed catshark
            Schroederichthys chilensis : Redspotted catshark
            Schroederichthys maculatus : Narrowtail catshark
            Schroederichthys saurisqualus : Lizard catshark
            Schroederichthys tenuis : Slender catshark
            Scyliorhinus besnardi : Polkadot catshark
            Scyliorhinus boa : Boa catshark
            Scyliorhinus canicula : Small-spotted catshark
            Scyliorhinus capensis : Yellow-spotted catshark
            Scyliorhinus cervigoni : West African catshark
            Scyliorhinus comoroensis : Comoro catshark
            Scyliorhinus garmani : Brownspotted catshark
            Scyliorhinus haeckelii : Freckled catshark
            Scyliorhinus hesperius : White-saddled catshark
            Scyliorhinus meadi : Blotched catshark
            Scyliorhinus retifer : Chain catshark
            Scyliorhinus stellaris : Nursehound
            Scyliorhinus tokubee : Izu catshark
            Scyliorhinus torazame : Cloudy catshark
            Scyliorhinus torrei : Dwarf catshark
            Eusphyra blochii : Winghead shark
            Sphyrna corona : Scalloped bonnethead
            Sphyrna gilberti : Carolina hammerhead
            Sphyrna lewini : Scalloped hammerhead
            Sphyrna media : Scoophead
            Sphyrna mokarran : Great hammerhead
            Sphyrna tiburo : Bonnethead
            Sphyrna tudes : Smalleye hammerhead
            Sphyrna zygaena : Smooth hammerhead
            Furgaleus macki : Whiskery shark
            Galeorhinus galeus : Tope shark
            Gogolia filewoodi : Sailback houndshark
            Hemitriakis abdita : Deepwater sicklefin houndshark
            Hemitriakis complicofasciata : Ocellate topeshark
            Hemitriakis falcata : Sicklefin houndshark
            Hemitriakis indroyonoi : Indonesian houndshark
            Hemitriakis japanica : Japanese topeshark
            Hemitriakis leucoperiptera : Whitefin topeshark
            Hypogaleus hyugaensis : Blacktip tope
            Iago garricki : Longnose houndshark
            Iago mangalorensis : Mangalore houndshark
            Iago omanensis : Bigeye houndshark
            Iago sp.B : Bengal smallgill houndshark
            Mustelus albipinnis : White-margin fin houndshark
            Mustelus antarcticus : Gummy shark
            Mustelus asterias : Starry smooth-hound
            Mustelus californicus : Grey smooth-hound
            Mustelus canis : Dusky smooth-hound
            Mustelus dorsalis : Sharptooth smooth-hound
            Mustelus fasciatus : Striped smooth-hound
            Mustelus griseus : Spotless smooth-hound
            Mustelus henlei : Brown smooth-hound
            Mustelus higmani : Smalleye smooth-hound
            Mustelus lenticulatus : Spotted estuary smooth-hound
            Mustelus lunulatus : Sicklefin smooth-hound
            Mustelus manazo : Starspotted smooth-hound
            Mustelus mento : Speckled smooth-hound
            Mustelus minicanis : Dwarf smooth-hound
            Mustelus mosis : Arabian smooth-hound
            Mustelus mustelus : Common smooth-hound
            Mustelus norrisi : Narrowfin smooth-hound
            Mustelus palumbes : Whitespotted smooth-hound
            Mustelus punctulatus : Blackspotted smooth-hound
            Mustelus ravidus : Australian grey smooth-hound
            Mustelus schmitti : Narrownose smooth-hound
            Mustelus sinusmexicanus : Gulf smoothhound
            Mustelus stevensi : Western spotted gummy shark
            Mustelus walkeri : Eastern spotted gummy shark
            Mustelus whitneyi : Humpback smooth-hound
            Mustelus widodoi : White-fin smooth-hound
            Scylliogaleus quecketti : Flapnose houndshark
            Triakis acutipinna : Sharpfin houndshark
            Triakis maculata : Spotted houndshark
            Triakis megalopterus : Sharptooth houndshark
            Triakis scyllium : Banded houndshark
            Triakis semifasciata : Leopard shark
            Echinorhinus brucus : Bramble shark
            Echinorhinus cookei : Prickly shark
            Heterodontus francisci : Horn shark
            Heterodontus galeatus : Crested bullhead shark
            Heterodontus japonicus : Japanese bullhead shark
            Heterodontus mexicanus : Mexican hornshark
            Heterodontus omanensis : Oman bullhead shark
            Heterodontus portusjacksoni : Port Jackson shark
            Heterodontus quoyi : Galapagos bullhead shark
            Heterodontus ramalheira : Whitespotted bullhead shark
            Heterodontus zebra : Zebra bullhead shark
            Chlamydoselachus africana : Southern African frilled shark
            Chlamydoselachus anguineus : Frilled shark
            Heptranchias perlo : Sharpnose sevengill shark
            Hexanchus griseus : Bluntnose sixgill shark
            Hexanchus nakamurai : Bigeyed sixgill shark
            Notorynchus cepedianus : Broadnose sevengill shark
            Alopias pelagicus : Pelagic thresher
            Alopias superciliosus : Bigeye thresher
            Alopias vulpinus : Common thresher
            Cetorhinus maximus : Basking shark
            Carcharodon carcharias : Great white shark
            Isurus oxyrinchus : Shortfin mako
            Isurus paucus : Longfin mako
            Lamna ditropis : Salmon shark
            Lamna nasus : Porbeagle
            Megachasma pelagios : Megamouth shark
            Mitsukurina owstoni : Goblin shark
            Carcharias taurus : Sand tiger shark
            Odontaspis ferox : Smalltooth sand tiger
            Odontaspis noronhai : Bigeye sand tiger
            Pseudocarcharias kamoharai : Crocodile shark
            Brachaelurus colcloughi : Bluegrey carpetshark
            Brachaelurus waddi : Blind shark
            Ginglymostoma cirratum : Nurse shark
            Nebrius ferrugineus : Tawny nurse shark
            Pseudoginglymostoma brevicaudatum : Short-tail nurse shark
            Chiloscyllium arabicum : Arabian carpetshark
            Chiloscyllium burmensis : Burmese bamboo shark
            Chiloscyllium griseum : Grey bamboo shark
            Chiloscyllium hasselti : Hasselt's bamboo shark
            Chiloscyllium indicum : Slender bamboo shark
            Chiloscyllium plagiosum : Whitespotted bamboo shark
            Chiloscyllium punctatum : Brownbanded bamboo shark
            Hemiscyllium freycineti : Indonesian speckled carpetshark
            Hemiscyllium galei : Cenderwasih epaulette shark
            Hemiscyllium hallstromi : Papuan epaulette shark
            Hemiscyllium halmahera : Halmahera epaulette shark
            Hemiscyllium henryi : Henry's epaulette shark
            Hemiscyllium michaeli : Milne Bay epaulette shark
            Hemiscyllium ocellatum : Epaulette shark
            Hemiscyllium strahani : Hooded carpetshark
            Hemiscyllium trispeculare : Speckled carpetshark
            Eucrossorhinus dasypogon : Tasselled wobbegong
            Orectolobus floridus : Floral banded wobbegong
            Orectolobus halei : Gulf wobbegong
            Orectolobus hutchinsi : Western wobbegong
            Orectolobus japonicus : Japanese wobbegong
            Orectolobus leptolineatus : Indonesian wobbegong
            Orectolobus maculatus : Spotted wobbegong
            Orectolobus ornatus : Ornate wobbegong
            Orectolobus parvimaculatus : Dwarf spotted wobbegong
            Orectolobus reticulatus : Network wobbegong
            Orectolobus wardi : Northern wobbegong
            Sutorectus tentaculatus : Cobbler wobbegong
            Cirrhoscyllium expolitum : Barbelthroat carpetshark
            Cirrhoscyllium formosanum : Taiwan saddled carpetshark
            Cirrhoscyllium japonicum : Saddled carpetshark
            Parascyllium collare : Collared carpetshark
            Parascyllium elongatum : Elongate carpetshark
            Parascyllium ferrugineum : Rusty carpetshark
            Parascyllium sparsimaculatum : Ginger carpetshark
            Parascyllium variolatum : Necklace carpetshark
            Rhincodon typus : Whale shark
            Stegostoma fasciatum : Zebra shark
            Pliotrema warreni : Sixgill sawshark
            Pristiophorus cirratus : Longnose sawshark
            Pristiophorus delicatus : Tropical sawshark
            Pristiophorus japonicus : Japanese sawshark
            Pristiophorus lanae : Lana's sawshark
            Pristiophorus nancyae : African dwarf sawshark
            Pristiophorus nudipinnis : Shortnose sawshark
            Pristiophorus schroederi : Bahamas sawshark
            Centrophorus atromarginatus : Dwarf gulper shark
            Centrophorus granulosus : Gulper shark
            Centrophorus harrissoni : Dumb gulper shark
            Centrophorus isodon : Blackfin gulper shark
            Centrophorus lusitanicus : Lowfin gulper shark
            Centrophorus moluccensis : Smallfin gulper shark
            Centrophorus seychellorum : Seychelles gulper shark
            Centrophorus squamosus : Leafscale gulper shark
            Centrophorus tessellatus : Mosaic gulper shark
            Centrophorus westraliensis : Western gulper shark
            Centrophorus zeehaani : Southern dogfish
            Deania calcea : Birdbeak dogfish
            Deania hystricosa : Rough longnose dogfish
            Deania profundorum : Arrowhead dogfish
            Deania quadrispinosa : Longsnout dogfish
            Dalatias licha : Kitefin shark
            Euprotomicroides zantedeschia : Taillight shark
            Euprotomicrus bispinatus : Pygmy shark
            Heteroscymnoides marleyi : Longnose pygmy shark
            Isistius brasiliensis : Cookiecutter shark
            Isistius plutodus : Largetooth cookiecutter shark
            Mollisquama parini : Pocket shark
            Squaliolus aliae : Smalleye pygmy shark
            Squaliolus laticaudus : Spined pygmy shark
            Aculeola nigra : Hooktooth dogfish
            Centroscyllium excelsum : Highfin dogfish
            Centroscyllium fabricii : Black dogfish
            Centroscyllium granulatum : Granular dogfish
            Centroscyllium kamoharai : Bareskin dogfish
            Centroscyllium nigrum : Combtooth dogfish
            Centroscyllium ornatum : Ornate dogfish
            Centroscyllium ritteri : Whitefin dogfish
            Etmopterus baxteri : New Zealand lanternshark
            Etmopterus bigelowi : Blurred lanternshark
            Etmopterus brachyurus : Short-tail lanternshark
            Etmopterus bullisi : Lined lanternshark
            Etmopterus burgessi : Broad-snout lanternshark
            Etmopterus carteri : Cylindrical lanternshark
            Etmopterus caudistigmus : Tailspot lanternshark
            Etmopterus compagnoi : Brown lanternshark
            Etmopterus decacuspidatus : Combtooth lanternshark
            Etmopterus dianthus : Pink lanternshark
            Etmopterus dislineatus : Lined lanternshark
            Etmopterus evansi : Blackmouth lanternshark
            Etmopterus fusus : Pygmy lanternshark
            Etmopterus gracilispinis : Broadbanded lanternshark
            Etmopterus granulosus : Southern lanternshark
            Etmopterus hillianus : Caribbean lanternshark
            Etmopterus joungi : Shortfin smooth lanternshark
            Etmopterus litvinovi : Smalleye lanternshark
            Etmopterus lucifer : Blackbelly lanternshark
            Etmopterus molleri : Slendertail lanternshark
            Etmopterus perryi : Dwarf lanternshark
            Etmopterus polli : African lanternshark
            Etmopterus princeps : Great lanternshark
            Etmopterus pseudosqualiolus : False lanternshark
            Etmopterus pusillus : Smooth lanternshark
            Etmopterus pycnolepis : Dense-scale lanternshark
            Etmopterus robinsi : West Indian lanternshark
            Etmopterus schultzi : Fringefin lanternshark
            Etmopterus sculptus : Sculpted lanternshark
            Etmopterus sentosus : Thorny lanternshark
            Etmopterus sheikoi : Rasptooth dogfish
            Etmopterus spinax : Velvet belly lanternshark
            Etmopterus splendidus : Splendid lanternshark
            Etmopterus unicolor : Bristled lanternshark
            Etmopterus viator : Blue-eye lanternshark
            Etmopterus villosus : Hawaiian lanternshark
            Etmopterus virens : Green lanternshark
            Trigonognathus kabeyai : Viper dogfish
            Oxynotus bruniensis : Prickly dogfish
            Oxynotus caribbaeus : Caribbean roughshark
            Oxynotus centrina : Angular roughshark
            Oxynotus japonicus : Japanese roughshark
            Oxynotus paradoxus : Sailfin roughshark
            Centroscymnus coelolepis : Portuguese dogfish
            Centroscymnus owstonii : Roughskin dogfish
            Centroselachus crepidater : Longnose velvet dogfish
            Proscymnodon macracanthus : Largespine velvet dogfish
            Proscymnodon plunketi : Plunket shark
            Scymnodalatias albicauda : Whitetail dogfish
            Scymnodalatias garricki : Azores dogfish
            Scymnodalatias oligodon : Sparsetooth dogfish
            Scymnodalatias sherwoodi : Sherwood's dogfish
            Scymnodon ringens : Knifetooth dogfish
            Somniosus antarcticus : Southern sleeper shark
            Somniosus longus : Frog shark
            Somniosus microcephalus : Greenland shark
            Somniosus pacificus : Pacific sleeper shark
            Somniosus rostratus : Little sleeper shark
            Zameus ichiharai : Japanese velvet dogfish
            Zameus squamulosus : Velvet dogfish
            Cirrhigaleus asper : Roughskin spurdog
            Cirrhigaleus australis : Southern mandarin dogfish
            Cirrhigaleus barbifer : Mandarin dogfish
            Squalus acanthias : Spiny dogfish
            Squalus albifrons : Eastern highfin spurdog
            Squalus altipinnis : Western highfin spurdog
            Squalus blainville : Longnose spurdog
            Squalus brevirostris : Japanese shortnose spurdog
            Squalus bucephalus : Bighead spurdog
            Squalus chloroculus : Greeneye spurdog
            Squalus crassispinus : Fatspine spurdog
            Squalus cubensis : Cuban dogfish
            Squalus edmundsi : Edmund's spurdog
            Squalus formosus : Taiwan spurdog
            Squalus grahami : Eastern longnose spurdog
            Squalus griffini : Northern spiny dogfish
            Squalus hemipinnis : Indonesian shortsnout spurdog
            Squalus japonicus : Japanese spurdog
            Squalus lalannei : Seychelles spurdog
            Squalus megalops : Shortnose spurdog
            Squalus melanurus : Blacktailed spurdog
            Squalus mitsukurii : Shortspine spurdog
            Squalus montalbani : Indonesian greeneye spurdog
            Squalus nasutus : Western longnose spurdog
            Squalus notocaudatus : Bartail spurdog
            Squalus rancureli : Cyrano spurdog
            Squalus raoulensis : Kermadec spiny dogfish
            Squalus suckleyi : Spotted spiny dogfish
            Squatina aculeata : Sawback angelshark
            Squatina africana : African angelshark
            Squatina albipunctata : Eastern angelshark
            Squatina argentina : Argentine angelshark
            Squatina armata : Chilean angelshark
            Squatina australis : Australian angelshark
            Squatina caillieti : Philippines angelshark
            Squatina californica : Pacific angelshark
            Squatina dumeril : Sand devil
            Squatina formosa : Taiwan angelshark
            Squatina guggenheim : Angular angelshark
            Squatina heteroptera : Gulf angelshark
            Squatina japonica : Japanese angelshark
            Squatina legnota : Indonesian angelshark
            Squatina mexicana : Mexican angelshark
            Squatina nebulosa : Clouded angelshark
            Squatina occulta : Hidden angelshark
            Squatina oculata : Smoothback angelshark
            Squatina pseudocellata : Western angelshark
            Squatina squatina : Angelshark
            Squatina tergocellata : Ornate angelshark
            Squatina tergocellatoides : Ocellated angelshark"

In [None]:
from pathlib import Path

In [None]:
file = Path("../../../Desktop/tib.txt")

In [None]:
lines = file.read_text().splitlines()

In [None]:
types = [i.strip() for i in lines]

In [None]:
types_2 = [i.split(":") for i in types]

In [None]:
types_3 = [j for i,j in types_2]

In [None]:
types_4 = [i.upper().strip() for i in types_3]

In [None]:
types_4

In [None]:
for i in types_4:
    print(i)

In [None]:
sharky = []
for i in tqdm(sharks.index):
    for j in types_4:
        if re.search(r"\w*"+j,str(sharks["species"][i]).upper()):
            sharks.loc[i,"species"] = j

In [None]:
sharks["species"]

In [None]:
re.search(r"\w*WHITE","HOLA WHIT SHARK")