In [1]:
import numpy as np
import pandas as pd
from pymongo import MongoClient

In [2]:
# alapértelmezett beállításokkal, csupán az adatbázisnév tetszőleges
def get_mongo_database(
    db_name, host="localhost", port=27017, username=None, password=None
):
    """ Elérjük a db_name-mel hivatkozott adatbázist MongoDB-ről, hitelesítő adatokkal vagy azok nélkül """

    if username and password:
        mongo_uri = "mongodb://{}:{}@{}/{}".format(username, password, host, db_name)
        # URI = uniform resource identifier
        conn = MongoClient(mongo_uri)
    else:
        conn = MongoClient(host, port)

    return conn[
        db_name
    ]  # itt már a második féle jelölést használjuk az adatbázis eléréséhez


def mongo_to_dataframe(
    db_name,
    collection,
    query={},
    host="localhost",
    port=27017,
    username=None,
    password=None,
    no_id=True,
):
    """ Létrehoz egy DataFrame-t egy MongoDB collection-ból """

    db = get_mongo_database(db_name, host, port, username, password)

    # megkeresi a collection-ból a lekérdezés szerinti elemeket
    cursor = db[collection].find(query)

    df = pd.DataFrame(list(cursor))

    # amennyiben azt szeretnénk, hogy törölje ki a MongoDB által hozzáadott id-t
    if no_id:
        del df["_id"]

    return df


# alapvetően, a removeexisting kitörli, ha már létezik,
# a skipifexists, átugorja, ha már létezik,
# de a forceupdate esetén vagy hozzáfűzi, vagy törli és újratölti a removeexisting függvényében
def dataframe_to_mongo(
    df,
    db_name,
    collection,
    host="localhost",
    port=27017,
    username=None,
    password=None,
    removeexisting=True,
    skipifexists=False,
    forceupdate=False,
):
    """ Lement egy DataFrame-t egy MongoDB collection-be """
    db = get_mongo_database(db_name, host, port, username, password)

    if (skipifexists and not forceupdate) and (collection in db.list_collection_names()):
        return

    if (collection in db.list_collection_names()) and removeexisting:
        db[collection].drop()

    records = df.to_dict(orient="records")

    db[collection].insert_many(records)

In [3]:
def dataframe_mongo_compatible(df):
    # a MongoDB-n való tárolás miatt vissza kell alakítani object-é az oszlopot, és NaN-ra állítani a NaT-okat,
    # mert másképp nem engedi feltölteni, ValueError miatt
    df.date_of_death = df.date_of_death.astype(object).where(df.date_of_death.notnull(), np.nan)

    return df

def dataframe_reverse_compatible(df):
    # visszalakítjuk datetime objektummá az oszlopot
    df.date_of_death = pd.to_datetime(df.date_of_death)

    return df

In [25]:
# lekérjük a Nobel-adatsorunkat az adatbázisunkból
df = mongo_to_dataframe('nobel_prize', 'winners_cleaned')

# lekérjük az 1996-ban győzött egyéneket
df[df.year == 1996].count() # 14 győztes, 1-el több, mint kellene
df[df.year == 1996] # meg is van, Rolf Zinkernagel kétszer szerepel
# Wikipédia hiba miatt nem törlődött, hiszen nem volt megcsillagozva a neve, jelölve a born_in és country ellentétet

# töröljük a svájci Zinkernagel-t, hiszen inkább mondható Ausztrálnak, mint svájcinak
df = df[~(df.name == 'Rolf M. Zinkernagel')]
df[df.year == 1996].count() # 13 győztes

# df.info() # 926 győztes, még mindig 7-el több, mint kellene

df = df.drop_duplicates(['name', 'year']) # nincsenek tökéletes duplikátumok névbeli elírás lehet

# valami nincs rendben 2000-el sem, hiszen több győztes van, mint kellene, és 2001-el sem
df[df.year == 2000].count() # megfelelő 13 győztes
df[df.year == 2001].count() # 1-el kevesebb, mint kellene, minden rendben, hiszen a 15-ik győztes szervezet, és nem egyén

df[df.gender == 'female'].count() # rendben van, 54 kell legyen
df.describe(include=['object']) # 922 unique név van, ami túl sok, 3-al több, mint kellene

Unnamed: 0,link,name,category,country,text,place_of_birth,place_of_death,gender
count,926,926,926,926,926,926,633,926
unique,922,922,6,59,926,648,334,2
top,http://en.wikipedia.org/wiki/Marie_Curie,Marie Curie,Physiology or Medicine,United States,"Pyotr Kapitsa , Physics, 1978",New York City,Cambridge,male
freq,2,2,221,363,1,34,35,872


In [34]:
# következő taktika arra, hogy megtaláljuk a 3 többlet győztest, hogy kategóriánkként nézzük meg melyikből van több
df[df.category == 'Physics'].count() # 1-el több, mint kellene
df[df.category == 'Chemistry'].count() # 1-el több, mint kellene
df[df.category == 'Physiology or Medicine'].count() # 2-vel több, mint kellene
df[df.category == 'Literature'].count() # 1-el kevesebb, mint kellene
df[df.category == 'Peace'].count() # rendben van
df[df.category == 'Economics'].count() # rendben van

# tehát, megtudtuk azt, hogy fizikából és kémiából 1-el több, orvostanból 2-vel több van, míg irodalomból valaki hiányzik

link              84
name              84
year              84
category          84
country           84
text              84
date_of_birth     84
date_of_death     43
place_of_birth    84
place_of_death    43
gender            84
award_age         84
dtype: int64

In [39]:
df[(df.category == 'Literature') & (df.year == 1974)]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
537,http://en.wikipedia.org/wiki/Eyvind_Johnson,Eyvind Johnson,1974,Literature,Sweden,"Eyvind Johnson , Literature, 1974",1900-07-29,1976-08-25,Boden,Stockholm,male,74
540,http://en.wikipedia.org/wiki/Harry_Martinson,Harry Martinson,1974,Literature,Sweden,"Harry Martinson , Literature, 1974",1904-05-06,1978-02-11,Blekinge,Stockholm,male,70


In [41]:
df[df.name == 'Olga Tokarczuk']

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
594,http://en.wikipedia.org/wiki/Olga_Tokarczuk,Olga Tokarczuk,2018,Literature,Poland,"Olga Tokarczuk , Literature, 2018",1962-01-29,NaT,Sulechów,,female,56


In [50]:
# következő technika, hogy országok szerint ellenőrízzük le
# amerikai irodalmárokból hiányzik 1
df[(df.category == 'Literature') & (df.country == 'United States')].count()
df[(df.category == 'Literature') & (df.country == 'United States')].sort_values(by=['year'])
# végül kiderült Amerikából sem hiányzik, csak Lengyelországba van bejegyezve a nyertes

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
52,http://en.wikipedia.org/wiki/Sinclair_Lewis,Sinclair Lewis,1930,Literature,United States,"Sinclair Lewis , Literature, 1930",1885-02-07,1951-01-10,Sauk Centre,Rome,male,45
59,http://en.wikipedia.org/wiki/Eugene_O%27Neill,Eugene O'Neill,1936,Literature,United States,"Eugene O'Neill , Literature, 1936",1888-10-16,1953-11-27,New York City,Boston,male,48
58,http://en.wikipedia.org/wiki/Pearl_S._Buck,Pearl S. Buck,1938,Literature,United States,"Pearl S. Buck , Literature, 1938",1892-06-26,1973-03-06,Hillsboro,Danby,female,46
66,http://en.wikipedia.org/wiki/William_Faulkner,William Faulkner,1949,Literature,United States,"William Faulkner , Literature, 1949",1897-09-25,1962-07-06,New Albany,Byhalia,male,52
90,http://en.wikipedia.org/wiki/Ernest_Hemingway,Ernest Hemingway,1954,Literature,United States,"Ernest Hemingway , Literature, 1954",1899-07-21,1961-07-02,Oak Park,Ketchum,male,55
33,http://en.wikipedia.org/wiki/John_Steinbeck,John Steinbeck,1962,Literature,United States,"John Steinbeck , Literature, 1962",1902-02-27,1968-12-20,Salinas,Harlem,male,60
163,http://en.wikipedia.org/wiki/Saul_Bellow,Saul Bellow,1976,Literature,United States,"Saul Bellow , born in Canada , Literature, 1976",1915-06-10,2005-04-05,Lachine,Brookline,male,61
139,http://en.wikipedia.org/wiki/Isaac_Bashevis_Si...,Isaac Bashevis Singer,1978,Literature,United States,"Isaac Bashevis Singer , born in then Russian ...",1902-11-21,1991-07-24,Leoncin,Miami,male,76
205,http://en.wikipedia.org/wiki/Joseph_Brodsky,Joseph Brodsky,1987,Literature,United States,"Joseph Brodsky , born in Russia , Literature,...",1940-05-24,1996-01-28,Saint Petersburg,Brooklyn,male,47
232,http://en.wikipedia.org/wiki/Toni_Morrison,Toni Morrison,1993,Literature,United States,"Toni Morrison , Literature, 1993",1931-02-18,2019-08-05,Lorain,The Bronx,female,62


In [89]:
count = 0

for i in range(0, 10):
    year = 1960 + i
    count = count + int(df[(df.category == 'Literature') & (df.year == year) & (df.gender == 'male')].count().link)

print(count)

# kiderült 1960-1969 között hiányzik egy

9


In [73]:
df[(df.category == 'Literature') & (df.year == 1913)]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
695,http://en.wikipedia.org/wiki/Rabindranath_Tagore,Rabindranath Tagore,1913,Literature,India,"Rabindranath Tagore , Literature, 1913",1861-05-07,1941-08-07,Kolkata,Kolkata,male,52


In [92]:
df[(df.category == 'Literature') & (df.year >= 1960) & (df.year <= 1969)].sort_values(by=['year'])

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
778,http://en.wikipedia.org/wiki/Saint-John_Perse,Saint-John Perse,1960,Literature,France,"Saint-John Perse , Literature, 1960",1887-05-31,1975-09-20,Pointe-à-Pitre,Hyères,male,73
33,http://en.wikipedia.org/wiki/John_Steinbeck,John Steinbeck,1962,Literature,United States,"John Steinbeck , Literature, 1962",1902-02-27,1968-12-20,Salinas,Harlem,male,60
724,http://en.wikipedia.org/wiki/Giorgos_Seferis,Giorgos Seferis,1963,Literature,Greece,"Giorgos Seferis , Literature, 1963",1900-03-13,1971-09-20,İzmir,Athens,male,63
777,http://en.wikipedia.org/wiki/Jean-Paul_Sartre,Jean-Paul Sartre,1964,Literature,France,"Jean-Paul Sartre , Literature, 1964 (declined ...",1905-06-21,1980-04-15,Paris,14th arrondissement of Paris,male,59
577,http://en.wikipedia.org/wiki/Mikhail_Sholokhov,Mikhail Sholokhov,1965,Literature,Russia and Soviet Union,"Mikhail Sholokhov , Literature, 1965",1905-05-11,1984-02-21,Kruzhilin,Vyoshenskaya,male,60
543,http://en.wikipedia.org/wiki/Nelly_Sachs,Nelly Sachs,1966,Literature,Sweden,"Nelly Sachs , born in Germany , Literature, 1966",1891-12-10,1970-05-12,Berlin-Schöneberg,Stockholm,female,75
669,http://en.wikipedia.org/wiki/Shmuel_Yosef_Agnon,Shmuel Yosef Agnon,1966,Literature,Israel,"Shmuel Yosef Agnon , born in Austria-Hungary...",1888-07-17,1970-02-17,Buchach,Jerusalem,male,78
645,http://en.wikipedia.org/wiki/Miguel_%C3%81ngel...,Miguel Ángel Asturias,1967,Literature,Guatemala,"Miguel Ángel Asturias , Literature, 1967",1899-10-19,1974-06-09,Guatemala City,Madrid,male,68
631,http://en.wikipedia.org/wiki/Yasunari_Kawabata,Yasunari Kawabata,1968,Literature,Japan,"Yasunari Kawabata , Literature, 1968",1899-06-11,1972-04-16,Ōsaka,Zushi,male,69
670,http://en.wikipedia.org/wiki/Samuel_Beckett,Samuel Beckett,1969,Literature,Ireland,"Samuel Beckett , Literature, 1969",1906-04-13,1989-12-22,Dublin,Paris,male,63


In [145]:
df[df.name == u'Ivo Andrić']
# kiderült, hogy a fenti Jugoszláv úriember hiányzik, manuális hozzáadjuk az adatsorunkhoz és ezzel az irodalom rendben van

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age


In [95]:
data_ivo = {'link': u'http://en.wikipedia.org/wiki/Ivo_Andri%C4%87',  
            'name': u'Ivo Andrić', 
            'year': 1961, 
            'category': 'Literature', 
            'text': u'Ivo Andrić*, born in Condominium of Bosnia and Herzegovina, Austria-Hungary (now Bosnia and Herzegovina), Literature, 1961',
            'country': 'Bosnia and Herzegovina',
            'date_of_birth': pd.to_datetime('9 October 1892'),
            'date_of_death': pd.to_datetime('13 March 1975'),
            'place_of_birth': 'Dolac',
            'place_of_death': 'Belgrade',
            'gender': 'male',
            'award_age': 69
           }

df.append(data_ivo, ignore_index=True)

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
0,http://en.wikipedia.org/wiki/Elfriede_Jelinek,Elfriede Jelinek,2004,Literature,Austria,"Elfriede Jelinek , Literature, 2004",1946-10-20,NaT,Mürzzuschlag,,female,58
1,http://en.wikipedia.org/wiki/Peter_C._Doherty,Peter C. Doherty,1996,Physiology or Medicine,Australia,"Peter C. Doherty , Physiology or Medicine, 1996",1940-10-15,NaT,Brisbane,,male,56
2,http://en.wikipedia.org/wiki/Rolf_Zinkernagel,Rolf Zinkernagel,1996,Physiology or Medicine,Australia,"Rolf Zinkernagel , Physiology or Medicine, 1996",1944-01-06,NaT,Riehen,,male,52
3,http://en.wikipedia.org/wiki/Ren%C3%A9_Cassin,René Cassin,1968,Peace,France,"René Cassin , Peace, 1968",1887-02-05,1976-02-20,Bayonne,Istanbul,male,81
4,http://en.wikipedia.org/wiki/Isamu_Akasaki,Isamu Akasaki,2014,Physics,Japan,"Isamu Akasaki , Physics, 2014",1929-01-30,NaT,Chiran,,male,85
...,...,...,...,...,...,...,...,...,...,...,...,...
922,http://en.wikipedia.org/wiki/Luis_Federico_Leloir,Luis Federico Leloir,1970,Chemistry,Argentina,"Luis Federico Leloir , Chemistry, 1970",1906-09-06,1987-12-02,Paris,Catamarca Province,male,64
923,http://en.wikipedia.org/wiki/Bernardo_Houssay,Bernardo Houssay,1947,Physiology or Medicine,Argentina,"Bernardo Houssay , Physiology or Medicine, 1947",1887-04-10,1971-09-21,Buenos Aires,Buenos Aires,male,60
924,http://en.wikipedia.org/wiki/Carlos_Saavedra_L...,Carlos Saavedra Lamas,1936,Peace,Argentina,"Carlos Saavedra Lamas , Peace, 1936",1878-11-01,1959-05-05,Buenos Aires,Buenos Aires,male,58
925,http://en.wikipedia.org/wiki/Brian_Schmidt,Brian Schmidt,2011,Physics,Australia,"Brian Schmidt , born in the United States , P...",1967-02-24,NaT,Missoula,,male,44


In [96]:
# miután rendben van az irodalom, béke és közgáz Nobel-győztesek sora, megkell nézzük, 
# hogy mi történt az orvostan, kémia és fizikával

count = 0

for i in range(1, 120):
    year = 1900 + i
    
    count = count + int(df[(df.category == 'Physiology or Medicine') & (df.year == year)].count().link)
    
    if not year % 10:
        print('{} = {}'.format(year, count))
        
        count = 0

1910 = 13
1920 = 6
1930 = 11
1940 = 13
1950 = 17
1960 = 20
1970 = 26
1980 = 26
1990 = 21
2000 = 21
2010 = 24


In [102]:
df[(df.category == 'Physiology or Medicine') & (df.year >= 1921) & (df.year <= 1930)].sort_values(by=['year'])

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
389,http://en.wikipedia.org/wiki/Archibald_Vivian_...,Archibald Vivian Hill,1922,Physiology or Medicine,United Kingdom,"Archibald Vivian Hill , Physiology or Medicine...",1886-09-26,1977-06-03,Bristol,Cambridge,male,36
690,http://en.wikipedia.org/wiki/Otto_Fritz_Meyerhof,Otto Fritz Meyerhof,1922,Physiology or Medicine,Germany,"Otto Fritz Meyerhof , Physiology or Medicine, ...",1884-04-12,1951-10-06,Hildesheim,Philadelphia,male,38
888,http://en.wikipedia.org/wiki/John_Macleod_(phy...,John James Rickard Macleod,1923,Physiology or Medicine,Canada,"John James Rickard Macleod , born in the Unit...",1876-09-06,1935-03-16,Clunie,Aberdeen,male,47
897,http://en.wikipedia.org/wiki/Frederick_Banting,Frederick G. Banting,1923,Physiology or Medicine,Canada,"Frederick G. Banting , Physiology or Medicine,...",1891-11-14,1941-02-21,Alliston,Musgrave Harbour,male,32
617,http://en.wikipedia.org/wiki/Willem_Einthoven,Willem Einthoven,1924,Physiology or Medicine,Netherlands,"Willem Einthoven , Physiology or Medicine, 1924",1860-05-21,1927-09-29,Semarang,Leiden,male,64
787,http://en.wikipedia.org/wiki/Johannes_Fibiger,Johannes Fibiger,1926,Physiology or Medicine,Denmark,"Johannes Fibiger , Physiology or Medicine, 1926",1867-04-23,1928-01-30,Silkeborg,Copenhagen,male,59
920,http://en.wikipedia.org/wiki/Julius_Wagner-Jau...,Julius Wagner-Jauregg,1927,Physiology or Medicine,Austria,"Julius Wagner-Jauregg , Physiology or Medicine...",1857-03-07,1940-09-27,Wels,Vienna,male,70
821,http://en.wikipedia.org/wiki/Charles_Nicolle,Charles Nicolle,1928,Physiology or Medicine,France,"Charles Nicolle , Physiology or Medicine, 1928",1866-09-21,1936-02-28,Rouen,Tunis,male,62
398,http://en.wikipedia.org/wiki/Frederick_Hopkins,Frederick Hopkins,1929,Physiology or Medicine,United Kingdom,"Frederick Hopkins , Physiology or Medicine, 1929",1861-06-20,1947-05-16,Eastbourne,Cambridge,male,68
619,http://en.wikipedia.org/wiki/Christiaan_Eijkman,Christiaan Eijkman,1929,Physiology or Medicine,Netherlands,"Christiaan Eijkman , Physiology or Medicine, 1929",1858-08-11,1930-11-05,Nijkerk,Utrecht,male,71


In [105]:
for i in range(0, 120):
    year = 1900 + i
    
    if int(df[(df.category == 'Physiology or Medicine') & (df.year == year)].count().link) >= 2:
        print(year, int(df[(df.category == 'Physiology or Medicine') & (df.year == year)].count().link), sep=' ')

1906 2
1908 3
1922 2
1923 2
1929 2
1932 2
1934 3
1936 2
1943 2
1944 2
1945 3
1947 3
1949 2
1950 3
1953 2
1954 3
1956 4
1958 3
1959 2
1960 2
1962 3
1963 3
1964 2
1965 3
1966 2
1967 3
1968 3
1969 3
1970 3
1972 2
1973 3
1974 3
1975 3
1976 3
1977 3
1978 3
1979 2
1980 3
1981 3
1982 3
1984 2
1985 2
1986 2
1988 3
1989 2
1990 2
1991 2
1992 2
1993 2
1994 2
1995 3
1996 2
1998 3
2000 3
2001 3
2002 3
2003 2
2004 2
2005 2
2006 2
2007 3
2008 3
2009 3
2011 3
2012 2
2013 3
2014 3
2015 3
2017 3
2018 2
2019 3


In [108]:
df[(df.category == 'Physiology or Medicine') & (df.year == 1956)]
# megvan a második dupla André Frédéric Cournand
df = df[~(df.name == u'André F. Cournand')]

df[(df.category == 'Physiology or Medicine') & (df.year == 1956)]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
95,http://en.wikipedia.org/wiki/Dickinson_W._Rich...,Dickinson W. Richards,1956,Physiology or Medicine,United States,"Dickinson W. Richards , Physiology or Medicine...",1895-10-30,1973-02-23,Orange,Litchfield County,male,61
764,http://en.wikipedia.org/wiki/Werner_Forssmann,Werner Forssmann,1956,Physiology or Medicine,Germany,"Werner Forssmann , Physiology or Medicine, 1956",1904-08-29,1979-06-01,Berlin,Schopfheim,male,52
780,http://en.wikipedia.org/wiki/Andr%C3%A9_Fr%C3%...,André Frédéric Cournand,1956,Physiology or Medicine,France,"André Frédéric Cournand , Physiology or Medici...",1895-09-24,1988-02-19,Paris,Great Barrington,male,61


In [109]:
# tehát mostmár megvan oldva minden, kivétel a fizika és kémia
# időközben kiderült, hogy van egy orvostan győztesem, aki valamely Nobel-listán szerepel, de a hivatalos például nem
# de bennehagyom, hiszen ő is nyertes volt 1908-ban

link              925
name              925
year              925
category          925
country           925
text              925
date_of_birth     925
date_of_death     632
place_of_birth    925
place_of_death    632
gender            925
award_age         925
dtype: int64

In [110]:
for i in range(0, 120):
    year = 1900 + i
    
    if int(df[(df.category == 'Physics') & (df.year == year)].count().link) >= 2:
        print(year, int(df[(df.category == 'Physics') & (df.year == year)].count().link), sep=' ')

1902 2
1903 3
1909 2
1915 2
1925 2
1927 2
1933 2
1936 2
1937 2
1951 2
1952 2
1954 2
1955 2
1956 3
1957 2
1958 3
1959 2
1961 2
1963 3
1964 3
1965 3
1970 2
1972 3
1973 3
1974 2
1975 3
1976 2
1977 3
1978 3
1979 3
1980 2
1981 3
1983 2
1984 2
1986 3
1987 2
1988 3
1989 3
1990 3
1993 2
1994 2
1995 2
1996 3
1997 3
1998 3
1999 2
2000 3
2001 3
2002 3
2003 3
2004 3
2005 3
2006 2
2007 2
2008 3
2009 3
2010 2
2011 4
2012 2
2013 2
2014 3
2015 2
2016 3
2017 3
2018 3
2019 3


In [113]:
df[(df.category == 'Physics') & (df.year == 2011)]
# fizika dupla Brian Schmidt

df = df[~(df.name == 'Brian Schmidt')]

df[(df.category == 'Physics') & (df.year == 2011)]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
337,http://en.wikipedia.org/wiki/Brian_P._Schmidt,Brian P. Schmidt,2011,Physics,United States,"Brian P. Schmidt , Physics, 2011",1967-02-24,NaT,Missoula,,male,44
338,http://en.wikipedia.org/wiki/Saul_Perlmutter,Saul Perlmutter,2011,Physics,United States,"Saul Perlmutter , Physics, 2011",1959-09-22,NaT,Champaign,,male,52
339,http://en.wikipedia.org/wiki/Adam_G._Riess,Adam G. Riess,2011,Physics,United States,"Adam G. Riess , Physics, 2011",1969-12-16,NaT,"Washington, D.C.",,male,42


In [114]:
for i in range(0, 120):
    year = 1900 + i
    
    if int(df[(df.category == 'Chemistry') & (df.year == year)].count().link) >= 2:
        print(year, int(df[(df.category == 'Chemistry') & (df.year == year)].count().link), sep=' ')

1912 2
1929 2
1931 2
1935 2
1937 2
1939 2
1946 3
1950 2
1951 2
1952 2
1956 2
1962 2
1963 2
1967 3
1969 2
1972 3
1973 2
1975 2
1979 2
1980 3
1981 2
1985 2
1986 3
1987 3
1988 3
1989 2
1993 2
1995 3
1996 3
1997 3
1998 2
1999 2
2000 3
2001 3
2002 3
2003 2
2004 3
2005 3
2008 3
2009 3
2010 3
2012 2
2013 3
2014 3
2015 3
2016 3
2017 3
2018 3
2019 3


In [129]:
df[df.duplicated(['year', 'date_of_birth'])]

# megvan a duplikátum, Ahmed Zewail
df[(df.category == 'Chemistry') & (df.year == 1999)]
df = df[~(df.name == 'Ahmed Zewail')]
df[(df.category == 'Chemistry') & (df.year == 1999)]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
219,http://en.wikipedia.org/wiki/Ahmed_H._Zewail,Ahmed H. Zewail,1999,Chemistry,United States,"Ahmed H. Zewail , born in Egypt , Chemistry, ...",1946-02-26,2016-08-02,Damanhur,Pasadena,male,53


In [130]:
df[df.duplicated(['year', 'date_of_birth'])]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
200,http://en.wikipedia.org/wiki/Jerome_Karle,Jerome Karle,1985,Chemistry,United States,"Jerome Karle , Chemistry, 1985",1918-06-18,2013-06-06,New York City,Annandale,male,67
582,http://en.wikipedia.org/wiki/%C3%89lie_Metchni...,Élie Metchnikoff,1908,Physiology or Medicine,Russia and Soviet Union,"Élie Metchnikoff , born in now Ukraine , Phys...",1845-05-03,1916-07-02,Ivanivka,Paris,male,63
836,http://en.wikipedia.org/wiki/Yves_Chauvin,Yves Chauvin,2005,Chemistry,France,"Yves Chauvin , Chemistry, 2005",1930-10-10,2015-01-28,Menen,Tours,male,75


In [133]:
df[(df.place_of_birth == 'Ivanivka')]
# kiderült, hogy az az ember, aki 1908-ban győztesnek vettem, valójában két néven is "létezik"
# töröljük az egyiket
df = df[~(df.name == u'Élie Metchnikoff')]
df[(df.place_of_birth == 'Ivanivka')]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
422,http://en.wikipedia.org/wiki/Ilya_Ilyich_Mechn...,Ilya Ilyich Mechnikov,1908,Physiology or Medicine,Ukraine,"Ilya Ilyich Mechnikov , Physiology or Medicine...",1845-05-03,1916-07-02,Ivanivka,Paris,male,63


In [139]:
df[(df.place_of_death == 'Tours') & (df.year == 2005)]

Unnamed: 0,link,name,year,category,country,text,date_of_birth,date_of_death,place_of_birth,place_of_death,gender,award_age
836,http://en.wikipedia.org/wiki/Yves_Chauvin,Yves Chauvin,2005,Chemistry,France,"Yves Chauvin , Chemistry, 2005",1930-10-10,2015-01-28,Menen,Tours,male,75


In [140]:
df.describe(include=['object'])

Unnamed: 0,link,name,category,country,text,place_of_birth,place_of_death,gender
count,922,922,922,922,922,922,630,922
unique,918,918,6,59,922,648,334,2
top,http://en.wikipedia.org/wiki/Marie_Curie,Marie Curie,Physiology or Medicine,United States,"Pyotr Kapitsa , Physics, 1978",New York City,Cambridge,male
freq,2,2,219,362,1,34,35,868


In [151]:
data_ivo = {'link': u'http://en.wikipedia.org/wiki/Ivo_Andri%C4%87',  
            'name': u'Ivo Andrić', 
            'year': 1961, 
            'category': 'Literature', 
            'text': u'Ivo Andrić*, born in Condominium of Bosnia and Herzegovina, Austria-Hungary (now Bosnia and Herzegovina), Literature, 1961',
            'country': 'Bosnia and Herzegovina',
            'date_of_birth': pd.to_datetime('9 October 1892'),
            'date_of_death': pd.to_datetime('13 March 1975'),
            'place_of_birth': 'Dolac',
            'place_of_death': 'Belgrade',
            'gender': 'male',
            'award_age': 69
           }

df = df.append(data_ivo, ignore_index=True)

In [160]:
df.describe(include=['object'])
# minden korrekt, 919 egyéni győztes, duplákkal együtt 923, bármilyen egyesület nélkül
df.count()

link              923
name              923
year              923
category          923
country           923
text              923
date_of_birth     923
date_of_death     631
place_of_birth    923
place_of_death    631
gender            923
award_age         923
dtype: int64

In [157]:
# mostmár fel is tölthetjük a teljes listát, majd kiegészíthetjük Ivo Andric életrajzi adataival a másikat
df = dataframe_mongo_compatible(df)

dataframe_to_mongo(df, 'nobel_prize', 'winners_cleaned', forceupdate=True)

df = dataframe_reverse_compatible(df)

In [158]:
df_w_bios = mongo_to_dataframe('nobel_prize', 'winners_cleaned_w_bios')

In [177]:
df = df.reset_index()

df_new = df[['link', 'name', 'category', 'country', 'text', 'place_of_birth', 'place_of_death', 'gender', 'year', 'award_age',\
            'date_of_death', 'date_of_birth']].merge(df_w_bios[['name', 'year', 'image_urls', 'mini_bio', 'bio_image']], on=['name', 'year'], how='left')

ivo_bio = '<p><b>Ivo Andrić</b> (<a href="/wiki/Serbian_Cyrillic_alphabet" title="Serbian Cyrillic alphabet">Serbian Cyrillic</a>: <span lang="sr-Cyrl">Иво Андрић</span>, <small>pronounced&nbsp;</small><span title="Representation in the International Phonetic Alphabet (IPA)" class="IPA"><a href="/wiki/Help:IPA/Serbo-Croatian" title="Help:IPA/Serbo-Croatian">[ǐːʋo ǎːndritɕ]</a></span>; born <b>Ivan Andrić</b>; 9 October 1892&nbsp;– 13 March 1975) was a Yugoslav<sup id="cite_ref-5" class="reference"><a href="#cite_note-5">[a]</a></sup> novelist, poet and <a href="/wiki/Short_story" title="Short story">short story</a> writer who won the <a href="/wiki/Nobel_Prize_in_Literature" title="Nobel Prize in Literature">Nobel Prize in Literature</a> in 1961. His writings dealt mainly with life in his native <a href="/wiki/Bosnia_(region)" title="Bosnia (region)">Bosnia</a> under <a href="/wiki/Ottoman_Bosnia_and_Herzegovina" title="Ottoman Bosnia and Herzegovina">Ottoman rule</a>.</p>'\
+ '<p>Born in <a href="/wiki/Travnik" title="Travnik">Travnik</a> in the <a href="/wiki/Austrian_Empire" title="Austrian Empire">Austrian Empire</a>, modern-day <a href="/wiki/Bosnia" class="mw-redirect" title="Bosnia">Bosnia</a>, Andrić attended high school in <a href="/wiki/Sarajevo" title="Sarajevo">Sarajevo</a>, where he became an active member of several <a href="/wiki/South_Slavs" title="South Slavs">South Slav</a> national youth organizations. Following the <a href="/wiki/Assassination_of_Archduke_Franz_Ferdinand_of_Austria" class="mw-redirect" title="Assassination of Archduke Franz Ferdinand of Austria">assassination</a> of Archduke <a href="/wiki/Archduke_Franz_Ferdinand_of_Austria" title="Archduke Franz Ferdinand of Austria">Franz Ferdinand</a> in June 1914, Andrić was arrested and imprisoned by the Austro-Hungarian police, who suspected his involvement in the plot. As the authorities were unable to build a strong case against him, he spent much of the war under <a href="/wiki/House_arrest" title="House arrest">house arrest</a>, only being released following a general amnesty for such cases in July 1917. After the war, he studied South Slavic history and literature at universities in <a href="/wiki/Zagreb" title="Zagreb">Zagreb</a> and <a href="/wiki/Graz" title="Graz">Graz</a>, eventually attaining his <a href="/wiki/Doctor_of_Philosophy" title="Doctor of Philosophy">Ph.D</a>. in Graz in 1924. He worked in the diplomatic service of the <a href="/wiki/Kingdom_of_Yugoslavia" title="Kingdom of Yugoslavia">Kingdom of Yugoslavia</a> from 1920 to 1923 and again from 1924 to 1941. In 1939, he became Yugoslavia\'s ambassador to <a href="/wiki/Nazi_Germany" title="Nazi Germany">Germany</a>, but his tenure ended in April 1941 with the <a href="/wiki/Invasion_of_Yugoslavia" title="Invasion of Yugoslavia">German-led invasion of his country</a>. Shortly after the invasion, Andrić returned to German-occupied <a href="/wiki/Belgrade" title="Belgrade">Belgrade</a>. He lived quietly in a friend\'s apartment for the duration of World War II, in conditions likened by some biographers to house arrest, and wrote some of his most important works, including <i><a href="/wiki/The_Bridge_on_the_Drina" title="The Bridge on the Drina">Na Drini ćuprija</a></i> (<i>The Bridge on the Drina</i>).</p>'\
+ '<p>Following the war, Andrić was named to a number of ceremonial posts in Yugoslavia, which had since come under communist rule. In 1961, the <a href="/wiki/Nobel_Committee" title="Nobel Committee">Nobel Committee</a> awarded him the Nobel Prize in Literature, selecting him over writers such as <a href="/wiki/J._R._R._Tolkien" title="J. R. R. Tolkien">J. R. R. Tolkien</a>, <a href="/wiki/Robert_Frost" title="Robert Frost">Robert Frost</a>, <a href="/wiki/John_Steinbeck" title="John Steinbeck">John Steinbeck</a> and <a href="/wiki/E._M._Forster" title="E. M. Forster">E. M. Forster</a>. The Committee cited "the epic force with which he&nbsp;... traced themes and depicted human destinies drawn from his country\'s history". Afterwards, Andrić\'s works found an international audience and were translated into a number of languages. In subsequent years, he received a number of awards in his native country. Andrić\'s health declined substantially in late 1974 and he died in Belgrade the following March.</p>'\
+ '<p>In the years following Andrić\'s death, the Belgrade apartment where he spent much of World War II was converted into a museum and a nearby street corner was named in his honour. A number of other cities in the former <a href="/wiki/Yugoslavia" title="Yugoslavia">Yugoslavia</a> also have streets bearing his name. In 2012, filmmaker <a href="/wiki/Emir_Kusturica" title="Emir Kusturica">Emir Kusturica</a> began construction of an ethno-town in eastern Bosnia that is <a href="/wiki/Andri%C4%87grad" title="Andrićgrad">named after Andrić</a>. As Yugoslavia\'s only Nobel Prize-winning writer, Andrić was well known and respected in his native country during his lifetime. In Bosnia and Herzegovina, beginning in the 1950s and continuing past the <a href="/wiki/Breakup_of_Yugoslavia" title="Breakup of Yugoslavia">breakup of Yugoslavia</a>, his works have been disparaged by Bosniak literary critics for their supposed anti-Muslim bias. In Croatia, his works were long shunned for nationalist reasons, and even briefly blacklisted following Yugoslavia\'s dissolution, but were rehabilitated by the literary community at the start of the 21st century. He is highly regarded in Serbia for his contributions to <a href="/wiki/Serbian_literature" title="Serbian literature">Serbian literature</a>.</p>'

In [179]:
df_new = df_new.set_index('name')
df_new

Unnamed: 0_level_0,link,category,country,text,place_of_birth,place_of_death,gender,year,award_age,date_of_death,date_of_birth,image_urls,mini_bio,bio_image
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Elfriede Jelinek,http://en.wikipedia.org/wiki/Elfriede_Jelinek,Literature,Austria,"Elfriede Jelinek , Literature, 2004",Mürzzuschlag,,female,2004,58,NaT,1946-10-20,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Elfriede Jelinek</b> (<small>German: </s...,full/2ce212a3d7d17ad40edb43ba51c4d5edabbee352.jpg
Peter C. Doherty,http://en.wikipedia.org/wiki/Peter_C._Doherty,Physiology or Medicine,Australia,"Peter C. Doherty , Physiology or Medicine, 1996",Brisbane,,male,1996,56,NaT,1940-10-15,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Peter Charles Doherty</b>, <span class=""...",full/3e0467061b4cabf7a60f238b9d51a8017c7c5122.jpg
Rolf Zinkernagel,http://en.wikipedia.org/wiki/Rolf_Zinkernagel,Physiology or Medicine,Australia,"Rolf Zinkernagel , Physiology or Medicine, 1996",Riehen,,male,1996,52,NaT,1944-01-06,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Rolf Martin Zinkernagel</b> <span class=...,full/0ddaff54c8cc90cbe3018431ef0a87e3189ed161.jpg
René Cassin,http://en.wikipedia.org/wiki/Ren%C3%A9_Cassin,Peace,France,"René Cassin , Peace, 1968",Bayonne,Istanbul,male,1968,81,1976-02-20,1887-02-05,[https://upload.wikimedia.org/wikipedia/common...,<p><b>René Samuel Cassin</b> (5 October 1887 –...,full/e08694de28d166c0fedff0c856820c25927b2751.jpg
Isamu Akasaki,http://en.wikipedia.org/wiki/Isamu_Akasaki,Physics,Japan,"Isamu Akasaki , Physics, 2014",Chiran,,male,2014,85,NaT,1929-01-30,,"<p><b>Isamu Akasaki</b><span style=""font-weigh...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Adolfo Pérez Esquivel,http://en.wikipedia.org/wiki/Adolfo_P%C3%A9rez...,Peace,Argentina,"Adolfo Pérez Esquivel , Peace, 1980",Buenos Aires,,male,1980,49,NaT,1931-11-26,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Adolfo Pérez Esquivel</b> (born November...,full/fda863143ba4f92d64d3349dff90aabe86305823.jpg
Luis Federico Leloir,http://en.wikipedia.org/wiki/Luis_Federico_Leloir,Chemistry,Argentina,"Luis Federico Leloir , Chemistry, 1970",Paris,Catamarca Province,male,1970,64,1987-12-02,1906-09-06,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Luis Federico Leloir</b> <span class=""no...",full/7f20974a211572c3ba715d69412da998466b32e7.jpg
Bernardo Houssay,http://en.wikipedia.org/wiki/Bernardo_Houssay,Physiology or Medicine,Argentina,"Bernardo Houssay , Physiology or Medicine, 1947",Buenos Aires,Buenos Aires,male,1947,60,1971-09-21,1887-04-10,,"<p><b>Bernardo Alberto Houssay</b> (April 10, ...",
Carlos Saavedra Lamas,http://en.wikipedia.org/wiki/Carlos_Saavedra_L...,Peace,Argentina,"Carlos Saavedra Lamas , Peace, 1936",Buenos Aires,Buenos Aires,male,1936,58,1959-05-05,1878-11-01,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Carlos Saavedra Lamas</b> (November 1, 1...",full/dfe7bd81cfe9c364f1d2ed94625c2e8ecd1530ac.jpg


In [182]:
df_new.loc[u'Ivo Andrić', 'mini_bio'] = ivo_bio
df_new.loc[u'Ivo Andrić', 'mini_bio']

'<p><b>Ivo Andrić</b> (<a href="/wiki/Serbian_Cyrillic_alphabet" title="Serbian Cyrillic alphabet">Serbian Cyrillic</a>: <span lang="sr-Cyrl">Иво Андрић</span>, <small>pronounced&nbsp;</small><span title="Representation in the International Phonetic Alphabet (IPA)" class="IPA"><a href="/wiki/Help:IPA/Serbo-Croatian" title="Help:IPA/Serbo-Croatian">[ǐːʋo ǎːndritɕ]</a></span>; born <b>Ivan Andrić</b>; 9 October 1892&nbsp;– 13 March 1975) was a Yugoslav<sup id="cite_ref-5" class="reference"><a href="#cite_note-5">[a]</a></sup> novelist, poet and <a href="/wiki/Short_story" title="Short story">short story</a> writer who won the <a href="/wiki/Nobel_Prize_in_Literature" title="Nobel Prize in Literature">Nobel Prize in Literature</a> in 1961. His writings dealt mainly with life in his native <a href="/wiki/Bosnia_(region)" title="Bosnia (region)">Bosnia</a> under <a href="/wiki/Ottoman_Bosnia_and_Herzegovina" title="Ottoman Bosnia and Herzegovina">Ottoman rule</a>.</p><p>Born in <a href="/wi

In [183]:
df_new

Unnamed: 0_level_0,link,category,country,text,place_of_birth,place_of_death,gender,year,award_age,date_of_death,date_of_birth,image_urls,mini_bio,bio_image
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Elfriede Jelinek,http://en.wikipedia.org/wiki/Elfriede_Jelinek,Literature,Austria,"Elfriede Jelinek , Literature, 2004",Mürzzuschlag,,female,2004,58,NaT,1946-10-20,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Elfriede Jelinek</b> (<small>German: </s...,full/2ce212a3d7d17ad40edb43ba51c4d5edabbee352.jpg
Peter C. Doherty,http://en.wikipedia.org/wiki/Peter_C._Doherty,Physiology or Medicine,Australia,"Peter C. Doherty , Physiology or Medicine, 1996",Brisbane,,male,1996,56,NaT,1940-10-15,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Peter Charles Doherty</b>, <span class=""...",full/3e0467061b4cabf7a60f238b9d51a8017c7c5122.jpg
Rolf Zinkernagel,http://en.wikipedia.org/wiki/Rolf_Zinkernagel,Physiology or Medicine,Australia,"Rolf Zinkernagel , Physiology or Medicine, 1996",Riehen,,male,1996,52,NaT,1944-01-06,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Rolf Martin Zinkernagel</b> <span class=...,full/0ddaff54c8cc90cbe3018431ef0a87e3189ed161.jpg
René Cassin,http://en.wikipedia.org/wiki/Ren%C3%A9_Cassin,Peace,France,"René Cassin , Peace, 1968",Bayonne,Istanbul,male,1968,81,1976-02-20,1887-02-05,[https://upload.wikimedia.org/wikipedia/common...,<p><b>René Samuel Cassin</b> (5 October 1887 –...,full/e08694de28d166c0fedff0c856820c25927b2751.jpg
Isamu Akasaki,http://en.wikipedia.org/wiki/Isamu_Akasaki,Physics,Japan,"Isamu Akasaki , Physics, 2014",Chiran,,male,2014,85,NaT,1929-01-30,,"<p><b>Isamu Akasaki</b><span style=""font-weigh...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Adolfo Pérez Esquivel,http://en.wikipedia.org/wiki/Adolfo_P%C3%A9rez...,Peace,Argentina,"Adolfo Pérez Esquivel , Peace, 1980",Buenos Aires,,male,1980,49,NaT,1931-11-26,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Adolfo Pérez Esquivel</b> (born November...,full/fda863143ba4f92d64d3349dff90aabe86305823.jpg
Luis Federico Leloir,http://en.wikipedia.org/wiki/Luis_Federico_Leloir,Chemistry,Argentina,"Luis Federico Leloir , Chemistry, 1970",Paris,Catamarca Province,male,1970,64,1987-12-02,1906-09-06,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Luis Federico Leloir</b> <span class=""no...",full/7f20974a211572c3ba715d69412da998466b32e7.jpg
Bernardo Houssay,http://en.wikipedia.org/wiki/Bernardo_Houssay,Physiology or Medicine,Argentina,"Bernardo Houssay , Physiology or Medicine, 1947",Buenos Aires,Buenos Aires,male,1947,60,1971-09-21,1887-04-10,,"<p><b>Bernardo Alberto Houssay</b> (April 10, ...",
Carlos Saavedra Lamas,http://en.wikipedia.org/wiki/Carlos_Saavedra_L...,Peace,Argentina,"Carlos Saavedra Lamas , Peace, 1936",Buenos Aires,Buenos Aires,male,1936,58,1959-05-05,1878-11-01,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Carlos Saavedra Lamas</b> (November 1, 1...",full/dfe7bd81cfe9c364f1d2ed94625c2e8ecd1530ac.jpg


In [185]:
df_new.loc[u'Ivo Andrić', 'bio_image'] = None

In [186]:
df_new

Unnamed: 0_level_0,link,category,country,text,place_of_birth,place_of_death,gender,year,award_age,date_of_death,date_of_birth,image_urls,mini_bio,bio_image
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Elfriede Jelinek,http://en.wikipedia.org/wiki/Elfriede_Jelinek,Literature,Austria,"Elfriede Jelinek , Literature, 2004",Mürzzuschlag,,female,2004,58,NaT,1946-10-20,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Elfriede Jelinek</b> (<small>German: </s...,full/2ce212a3d7d17ad40edb43ba51c4d5edabbee352.jpg
Peter C. Doherty,http://en.wikipedia.org/wiki/Peter_C._Doherty,Physiology or Medicine,Australia,"Peter C. Doherty , Physiology or Medicine, 1996",Brisbane,,male,1996,56,NaT,1940-10-15,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Peter Charles Doherty</b>, <span class=""...",full/3e0467061b4cabf7a60f238b9d51a8017c7c5122.jpg
Rolf Zinkernagel,http://en.wikipedia.org/wiki/Rolf_Zinkernagel,Physiology or Medicine,Australia,"Rolf Zinkernagel , Physiology or Medicine, 1996",Riehen,,male,1996,52,NaT,1944-01-06,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Rolf Martin Zinkernagel</b> <span class=...,full/0ddaff54c8cc90cbe3018431ef0a87e3189ed161.jpg
René Cassin,http://en.wikipedia.org/wiki/Ren%C3%A9_Cassin,Peace,France,"René Cassin , Peace, 1968",Bayonne,Istanbul,male,1968,81,1976-02-20,1887-02-05,[https://upload.wikimedia.org/wikipedia/common...,<p><b>René Samuel Cassin</b> (5 October 1887 –...,full/e08694de28d166c0fedff0c856820c25927b2751.jpg
Isamu Akasaki,http://en.wikipedia.org/wiki/Isamu_Akasaki,Physics,Japan,"Isamu Akasaki , Physics, 2014",Chiran,,male,2014,85,NaT,1929-01-30,,"<p><b>Isamu Akasaki</b><span style=""font-weigh...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Adolfo Pérez Esquivel,http://en.wikipedia.org/wiki/Adolfo_P%C3%A9rez...,Peace,Argentina,"Adolfo Pérez Esquivel , Peace, 1980",Buenos Aires,,male,1980,49,NaT,1931-11-26,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Adolfo Pérez Esquivel</b> (born November...,full/fda863143ba4f92d64d3349dff90aabe86305823.jpg
Luis Federico Leloir,http://en.wikipedia.org/wiki/Luis_Federico_Leloir,Chemistry,Argentina,"Luis Federico Leloir , Chemistry, 1970",Paris,Catamarca Province,male,1970,64,1987-12-02,1906-09-06,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Luis Federico Leloir</b> <span class=""no...",full/7f20974a211572c3ba715d69412da998466b32e7.jpg
Bernardo Houssay,http://en.wikipedia.org/wiki/Bernardo_Houssay,Physiology or Medicine,Argentina,"Bernardo Houssay , Physiology or Medicine, 1947",Buenos Aires,Buenos Aires,male,1947,60,1971-09-21,1887-04-10,,"<p><b>Bernardo Alberto Houssay</b> (April 10, ...",
Carlos Saavedra Lamas,http://en.wikipedia.org/wiki/Carlos_Saavedra_L...,Peace,Argentina,"Carlos Saavedra Lamas , Peace, 1936",Buenos Aires,Buenos Aires,male,1936,58,1959-05-05,1878-11-01,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Carlos Saavedra Lamas</b> (November 1, 1...",full/dfe7bd81cfe9c364f1d2ed94625c2e8ecd1530ac.jpg


In [187]:
df_new = df_new.reset_index()
df_new

Unnamed: 0,name,link,category,country,text,place_of_birth,place_of_death,gender,year,award_age,date_of_death,date_of_birth,image_urls,mini_bio,bio_image
0,Elfriede Jelinek,http://en.wikipedia.org/wiki/Elfriede_Jelinek,Literature,Austria,"Elfriede Jelinek , Literature, 2004",Mürzzuschlag,,female,2004,58,NaT,1946-10-20,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Elfriede Jelinek</b> (<small>German: </s...,full/2ce212a3d7d17ad40edb43ba51c4d5edabbee352.jpg
1,Peter C. Doherty,http://en.wikipedia.org/wiki/Peter_C._Doherty,Physiology or Medicine,Australia,"Peter C. Doherty , Physiology or Medicine, 1996",Brisbane,,male,1996,56,NaT,1940-10-15,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Peter Charles Doherty</b>, <span class=""...",full/3e0467061b4cabf7a60f238b9d51a8017c7c5122.jpg
2,Rolf Zinkernagel,http://en.wikipedia.org/wiki/Rolf_Zinkernagel,Physiology or Medicine,Australia,"Rolf Zinkernagel , Physiology or Medicine, 1996",Riehen,,male,1996,52,NaT,1944-01-06,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Rolf Martin Zinkernagel</b> <span class=...,full/0ddaff54c8cc90cbe3018431ef0a87e3189ed161.jpg
3,René Cassin,http://en.wikipedia.org/wiki/Ren%C3%A9_Cassin,Peace,France,"René Cassin , Peace, 1968",Bayonne,Istanbul,male,1968,81,1976-02-20,1887-02-05,[https://upload.wikimedia.org/wikipedia/common...,<p><b>René Samuel Cassin</b> (5 October 1887 –...,full/e08694de28d166c0fedff0c856820c25927b2751.jpg
4,Isamu Akasaki,http://en.wikipedia.org/wiki/Isamu_Akasaki,Physics,Japan,"Isamu Akasaki , Physics, 2014",Chiran,,male,2014,85,NaT,1929-01-30,,"<p><b>Isamu Akasaki</b><span style=""font-weigh...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
918,Adolfo Pérez Esquivel,http://en.wikipedia.org/wiki/Adolfo_P%C3%A9rez...,Peace,Argentina,"Adolfo Pérez Esquivel , Peace, 1980",Buenos Aires,,male,1980,49,NaT,1931-11-26,[https://upload.wikimedia.org/wikipedia/common...,<p><b>Adolfo Pérez Esquivel</b> (born November...,full/fda863143ba4f92d64d3349dff90aabe86305823.jpg
919,Luis Federico Leloir,http://en.wikipedia.org/wiki/Luis_Federico_Leloir,Chemistry,Argentina,"Luis Federico Leloir , Chemistry, 1970",Paris,Catamarca Province,male,1970,64,1987-12-02,1906-09-06,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Luis Federico Leloir</b> <span class=""no...",full/7f20974a211572c3ba715d69412da998466b32e7.jpg
920,Bernardo Houssay,http://en.wikipedia.org/wiki/Bernardo_Houssay,Physiology or Medicine,Argentina,"Bernardo Houssay , Physiology or Medicine, 1947",Buenos Aires,Buenos Aires,male,1947,60,1971-09-21,1887-04-10,,"<p><b>Bernardo Alberto Houssay</b> (April 10, ...",
921,Carlos Saavedra Lamas,http://en.wikipedia.org/wiki/Carlos_Saavedra_L...,Peace,Argentina,"Carlos Saavedra Lamas , Peace, 1936",Buenos Aires,Buenos Aires,male,1936,58,1959-05-05,1878-11-01,[https://upload.wikimedia.org/wikipedia/common...,"<p><b>Carlos Saavedra Lamas</b> (November 1, 1...",full/dfe7bd81cfe9c364f1d2ed94625c2e8ecd1530ac.jpg


In [188]:
# kész, fellehet tölteni a véglegest, és utána leellenőrízni a vizualizációt
df_new = dataframe_mongo_compatible(df_new)

dataframe_to_mongo(df_new, 'nobel_prize', 'winners_cleaned_w_bios', forceupdate=True)

df_new = dataframe_reverse_compatible(df_new)