In [164]:
import pandas as pd

In [165]:
def clean_title_fun(title):
    ''' Function that unifies book names'''
    if title is None or pd.isna(title):
        return None  

    return (str(title).lower()
                   .replace('´', "'")
                   .replace('?', '')
                   .replace('!', '')
                   .replace(',', '')
                   .replace('&', 'and')
                   .replace('’', "'"))


In [166]:
# Load tables
prehled_vse = pd.read_excel('excel tables/Přehled vše2_upraveno.xlsx')  
albatrosmedia = pd.read_excel('excel tables/Scraped Data AlbatrosMedia Series.xlsx')
prehled_vse['Sold To'] = prehled_vse['Sold To'].apply(lambda x: None if 'Not found' in str(x) else x)
albatrosmedia['Sold To'] = albatrosmedia['Sold To'].apply(lambda x: None if 'Not found' in x else x)

# Create column cleaned_titles - lower case, unified  
prehled_vse['cleaned_titles'] =  prehled_vse['Název knihy'].apply(clean_title_fun)

# Take URL as index
url_prehled = pd.Index(prehled_vse['URL'])
url_albatrosmedia = pd.Index(albatrosmedia['URL'])

# Find indices that are not in 'prehled vse'
missing_url = url_albatrosmedia.difference(url_prehled)
common_url = url_albatrosmedia.intersection(url_prehled)

# From Albatros rows only with the missing urls
albatros_missing = albatrosmedia[albatrosmedia['URL'].isin(missing_url)]

#### How many duplicities 

In [167]:
albatros_missing['Book Name'].value_counts()


Book Name
Seasons                                                      2
A Big Book of Poison                                         1
Rainbow Unicorns                                             1
Observing the Plants of the Forest with Hansel and Gretel    1
On the Go                                                    1
                                                            ..
Everything You’ve Ever Wanted to Know About The Moon         1
Fashion HiStory                                              1
Field Buddies                                                1
Forests of the World                                         1
Why Won’t You Flower?                                        1
Name: count, Length: 143, dtype: int64

### Replace Book Names with the ones that are in CRM

In [168]:
# Replace Book Names
# Format (old_book_name, illustrator): new_book_name
fix = {('Anthill', 'Tomáš Tůma'):'Anthill (How Animals Live)', ('Arnie’s Construction', 'Martin Sodomka'): 'Arnie & Construction', 
      ('Arnie’s Workshop', 'Martin Sodomka'):'Arnie & Workshop', ('Beehive', 'Tomáš Tůma'):'Beehive (How Animals Live)', ('Childhood of Famous People', 'Tomáš Tůma'): 'Childhood of Famous People: from Mozart to Einstein',
      ('Christmas all around the World', 'Ocean Hughes'):'Christmas (Reusable Sticker Book)', ('Christmas around the World', 'Mária Nerádová'): 'Christmas around the World (Children Around the World)',
      ('Don´t Be Afraid', 'Jakub Kaše;Lukáš Urbánek'): 'Don\'t Be Afraid', ('Everything You’ve Ever Wanted to Know About The Moon', 'Inna Chernyak'):'Everything You\'ve Ever Wanted to Know About The Moon', 
      ( 'History of Warriors','Kateřina Wagnerová Hikade;Tomáš Pernický;Ondřej Dolejší'):'Warriors', ('Numbers', 'Marie Urbánková'):'Numbers (Got It)',
      ('Numbers', 'Julie Cossette'): 'Numbers (Learning Wheel)', ('Shapes', 'Veronika Kopečková'): 'Shapes (Choose and Learn)',
      ('The Stories of Interesting Towns and Cities', 'Jakub Cenkl'): 'The Stories of Towns and Cities', ('Urban Legends: „True“ Horror Stories from City Streets', 'Miloš Mazal'):'Urban Legends: True Horror Stories from City Streets',
      ('What’s Your Craft?', 'Milan Starý'): 'What\'s Your Craft?', ('What’s Your Job?', 'Milan Starý'):'What\'s Your Job?',
      ('Why Paintings Don´t Need Titles', 'Jiří Franta'):'Why Paintings Don\'t Need Titles', ('Why Won’t You Flower?', 'Katarína Macurová'): 'Why Won\'t You Flower?',
      ('Seasons', 'Julie Cossette'):'Seasons (Learning Wheel)', ('Seasons', 'Veronika Kopečková'): 'Seasons (Choose and Learn)', 
      ('Town', 'Alexandra Májová Hetmerová'): 'Town (Yesterday and Today)'}
for (orig_book_name, ill), new_book_name  in fix.items():
    albatros_missing.loc[(albatros_missing['Book Name'] == orig_book_name) & (albatros_missing['Illustrators Scraped'] == ill), 'Book Name'] = new_book_name

albatros_missing['cleaned_titles'] =  albatros_missing['Book Name'].apply(clean_title_fun)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  albatros_missing['cleaned_titles'] =  albatros_missing['Book Name'].apply(clean_title_fun)


#### Match missing Titles with Prehled vse

In [169]:
# Take Book Name as Index
title_prehled = pd.Index(prehled_vse['cleaned_titles'])
book_albatrosmedia = pd.Index(albatros_missing['cleaned_titles'])

not_found_titles = book_albatrosmedia.difference(title_prehled)
found_titles = book_albatrosmedia.intersection(title_prehled)

# Titles that are not in 'prehled vse'
print(not_found_titles)
print(len(not_found_titles))

Index(['a day of colours', 'alliance of the brave', 'amazing domestic animals',
       'an atlas of incredible islands: real stories of unreal lands',
       'an explanatory dictionary', 'atlas of planet earth for children',
       'characters in literature and their stories', 'frostina',
       'get ready for christmas', 'get ready for halloween', 'girl power',
       'hope it turns out well', 'magic world of unicorns',
       'mandalas diamond art', 'mum and dad have split up',
       'mum dad i want to be president', 'on the go',
       'one ball two mice and what came next', 'parrots are great but…',
       'rabbits are great', 'rainbow unicorns',
       'rob memory doesn't want to leave',
       'scatterbrain the bogey doesn't want to leave',
       'shush do you hear the silence', 'smart scribbles', 'superstitions',
       'the book of disappearing plants', 'the life of a butterfly', 'the sky',
       'the snipkins', 'the year in colours', 'we're only human',
       'what goes on

#### Tuple (Scraped Book Title, Illustrator) -> Book Title in CMR table

In [170]:
# Add additional info to excel table prehled vse
for found_title in found_titles: 
    print(albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'URL'].values[0])
    print(prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'URL'])
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'URL'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'URL'].values[0]
   
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Illustrators Scraped'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Illustrators Scraped'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Age'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Age'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Book Parametes'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Book Parametes'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Sold To'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Sold To'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Description'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Description'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Authors Scraped'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Authors Scraped'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Scraping'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Scraping'].values[0]
    
    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Book Name Scraping Original'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Book Name Scraping Original'].values[0]

    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Scraping'] = True

    prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Category'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Category'].values[0]

                                                                          
    
    # for (columnName, columnData) in albatros_missing[(albatros_missing['cleaned_titles']  == found_title)].items():
    #     print(f'{str(columnName)}: {str(columnData.values[0])}')
    #     prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, str(columnName)] = str(columnData.values[0])

https://www.albatrosmedia.eu/book/a-big-book-of-poison/
14    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-big-book-of-snow-and-ice/
15    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-big-book-of-the-dark/
16    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-big-book-of-underground/
17    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-bug-hotel/
22    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-curious-dragon-and-the-seasons/
25    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-world-atlas-of-ghosts-and-friends/
35    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-year-in-the-garden/
36    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/a-year-in-the-woods/
37    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/adventure-geometry-the-cube/
40    NaN
Name: URL, dtype: object
https://www.albatrosmedia.eu/book/adventure-geometry-

  prehled_vse.loc[prehled_vse['cleaned_titles'] == found_title, 'Scraping'] = albatros_missing.loc[albatros_missing['cleaned_titles'] == found_title, 'Scraping'].values[0]


In [171]:
albatros_missing[albatros_missing['cleaned_titles'].isin(not_found_titles)]

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,URL,Illustrators Scraped,Age,Book Parametes,Sold To,Description,Book Name,Authors Scraped,Scraping,Book Name Scraping Original,Category,Scraped Series,cleaned_titles
10,10,12,https://www.albatrosmedia.eu/book/a-day-of-col...,Veronika Dvořáková,3-5,"Size 200 x 200 mm, 7 spreads, boardbook",,"Learn and talk\nThere are so many colours, mak...",A Day of Colours,Magda N. Garguláková,True,A Day of Colours,,Learn and talk,a day of colours
34,34,40,https://www.albatrosmedia.eu/book/alliace-of-t...,Jiří Franta,9-12,"Size 215 × 280 mm, 68 pages, hardcover",,Seventh-grader Thomas Malcolm is a hero in his...,Alliance of the Brave,Ester Stará,True,Alliance of the Brave,,,alliance of the brave
36,36,42,https://www.albatrosmedia.eu/book/amazing-dome...,Radka Martincová,6-8,"Size 215 x 280 mm, 64 pages, hardcover",,"Did you know that sows sing lullabies, bees da...",Amazing Domestic Animals,Ester Dobiášová,True,Amazing Domestic Animals,,,amazing domestic animals
41,41,47,https://www.albatrosmedia.eu/book/an-atlas-of-...,Adam Wolf,9-12,"Size 240 x 320 mm, 64 pages, hardcover",,Some islands appear on ancient maps but vanish...,An Atlas of Incredible Islands: Real stories o...,Radek Malý,True,An Atlas of Incredible Islands: Real stories o...,,,an atlas of incredible islands: real stories o...
42,42,49,https://www.albatrosmedia.eu/book/an-explanato...,Michaela Casková,3-5,"Size 233 × 187 mm, 96 pages, hardcover",,"What do words like home, snow, or teddy bear r...",An Explanatory Dictionary,Olga Stehlíková,True,An Explanatory Dictionary,,,an explanatory dictionary
80,80,86,https://www.albatrosmedia.eu/book/atlas-of-pla...,Tomáš Tůma,9-12,"Size 230 × 260 mm, widespreaded 460 × 520 mm, ...",,"An Exploration of Earth’s Wonders, From Its Co...",Atlas of Planet Earth for children,Oldřich Růžička,True,Atlas of Planet Earth for children,,Atlases for Children,atlas of planet earth for children
110,110,114,https://www.albatrosmedia.eu/book/characters-i...,Jakub Cenkl,6-8,"Size 215 × 280 mm, 64 pages, hardcover",,"Deep in the forest, next to the tallest spruce...",Characters in Literature and Their Stories,Štěpánka Sekaninová,True,Characters in Literature and Their Stories,,Remarkable Works of Art around the World,characters in literature and their stories
178,178,180,https://www.albatrosmedia.eu/book/frostina/,Zuzana Čupová,6-8,"Size 165 × 235 mm, 56 pages, hardcover",,"A tiny frost fairy named Frostina, living righ...",Frostina,Kateřina Čupová,True,Frostina,,,frostina
186,186,188,https://www.albatrosmedia.eu/book/get-ready-to...,Ocean Hughes,3-5,"Size 240 x 215 mm, 16 pages + blister with com...",,"Safe, dermatologically tested, and compliant w...",Get Ready for Christmas,Joli Hannah,True,Get Ready for Christmas,,Magic Tattoo Sticker Book,get ready for christmas
187,187,189,https://www.albatrosmedia.eu/book/get-ready-to...,Anuki Lopéz,3-5,"Size 240 x 215 mm, 16 pages + blister with com...",,Get Ready for Halloween: Glow in the Dark Tatt...,Get Ready for Halloween,Joli Hannah,True,Get Ready for Halloween,,Magic Tattoo Sticker Book,get ready for halloween


In [172]:
# Filter rows where cleaned titles are in not_found_titles
filtered = albatros_missing[albatros_missing['cleaned_titles'].isin(not_found_titles)]

# Concatenate with prehled_vse instead of append
prehled_vse = pd.concat([prehled_vse, filtered], ignore_index=True)

### Append scraped series and shuffle order of colums so that Scraped series are next to scraped stuff

In [173]:
prehled_vse['Scraped Series'] = prehled_vse['URL'].apply(lambda x: albatrosmedia[albatrosmedia['URL'] == x] ['Scraped Series'].values[0] if x in list(albatrosmedia['URL']) else None)
cols = list(prehled_vse.columns)
cols.insert(30, cols.pop(cols.index('Scraped Series'))) ## Shuffle order of columns 
prehled_vse = prehled_vse[cols]

#### Second round of unifying book names

In [174]:
fix_2 = {('Animal Adaptations: Extreme Conditions', 'Lida Larina'): 'Why Walruses Thrive On Ice and How Other Animals Survive the Heat or Darkness',
 ('Animal Adaptations: Unique Body Parts', 'Lida Larina'): 'Why the Anteater’s Tongue Is So Long and Other Ways Animals Are Equipped For Life',
 ('Bella and the Lost Ball', 'Mag Takac'):  'Betty and the Lost Ball',
 ('Bustling Cities of the World', 'Magdalena Konečná'): 'Colourful Cities', 
 ('Famous Finds and Finders: Searching for the Past', 'Adam Wolf'): 'Searching for the Past', 
 ('From Pyramids to Palaces: Architecture around the World', 'Marie Kraus'):'Forms of Architecture', # Grafika na obalu je trochu jiná, ale jinak stejná kniha 
 ('How Not to Kill Your Plant', 'Hannah Abbo'): 'I Have a House Plant Too', 
 ('Insectopia: The Wonderful World of Insects', 'Pavla Dvorská;Pavel Dvorský'): 'The Wonderful World of Insects', 
 ('Our Marvelous Memory', 'Dita Vopřadová'): 'Memory', 
 ('Parts of a Whole', 'Federico Bonifacini'): 'What Makes Things Things', 
 ('Planning Grandma’s Party: Measurements, Fractions, and Fun', 'Xiana Teimoy'): 'Grandma’s Amazing Celebration', 
 ('Our Camping Trip: Physics, Chemistry, and Fun', 'Xiana Teimoy'): 'Our Amazing Holiday',
 ('Say What? How We Communicate', 'Charlotte Molas'):'What? A Book About Communication and Understanding',
 ('Shapescapes', 'Lukáš Fibrich'):'What´s Happening in the Circle and in Other Shapes?',
 ('Snake Milker and Other Animal Jobs', 'Jakub Cenkl'): 'Occupations with Animals', 
 ('The Big Book of Jobs', 'Elena Pokaleva'): 'The Wonderful World of Jobs', # Diskutabilní, Obal je jiný, ale kniha pravděpodobně jinak stejná
 ('Tug Your Earlobe: A Picture Guide to Body Language', 'Adam Wolf'): 'Body Language and How to Read It', 
 ('Upside Down', 'Katarína Macurová'): 'Why Won’t You Flower?',
 ('What Goes Inside?', 'Federico Bonifacini'): 'What Fits Inside',
 ('What Plant Is This?', 'Carmen Saldaña'): 'What Is Growing Here?', 
 ('What should I wear now?', 'Alexandra Májová Hetmerová'):'Different Occasions', 
 ('What Should I Wear to Work?', 'Alexandra Májová Hetmerová'): 'Occupations',
 ('What Things Come From Nature?', 'Anastasiia Moshina'): 'What Does Nature Give Us?',
 ('What To Do If You Meet a Bear? A Nature Survival Guide', 'Adam Wolf'):'Animal Encounters',
 ('Where To? A Trip Around the Neighborhood', 'Tomáš Kopecký'): 'I Will Smartly Find My Way', 
 ('Words About Where: Let’s Learn Prepositions', 'Marie Urbankova'): 'In, On or Above? Let´s Learn Prepositions', 
 ('World Record Animals', 'Tomáš Pernický'): 'Records of the Animal World',
 ('Jak se stát akčním hrdinou', 'Martin Šojdr; Tomáš Pernický'): 'I Want to Be an Action Hero', 
 ('Kudy teče řeka', 'Mária Nerádová'): 'Mária Nerádová',
 ('Jak se stát hvězdou', 'Kateřina Hikadová; Ondřej Dolejší; Tomáš Pernický; Martina Lišková'): 'I Want to Be a Star', 
 ('Jak se stát mistrem vypravěčem', 'Kateřina Hikadová; Ondřej Dolejší; Tomáš Pernický; Tomáš Kučerovský'): 'I Want to Be a Storyteller',
 ('Kapka Kája', 'Edit Sliacka'): 'Rosie the Raindrop', 
 ('Čáp Čeněk', 'Edit Sliacka'):'Samie the Stork',
 ('Mraveniště', 'Tomáš Tůma'): 'Anthill (How Animals Live)', 
 ('Vyber si svou rostlinku... a nauč se o ni starat', 'Aneta Žabková'): 'Choose a Plant… and Learn to Care for It', 
 ('Vyber si svého mazlíčka... a nauč se o něj starat', 'Aneta Žabková'): 'Choose a Pet… and Learn to Care for It'}

fix_3 = {('Sevenspot’s Adventures for Little Readers', 'Vojtěch Kubašta'): 'The Adventures of Sevenspot for Little Readers',
         ('Animals', 'Marie Urbánková'): 'Animals (Got It)',
         ('Colours', 'Marie Urbánková'): 'Colours (Got It)'}

for key, value in fix_2.items():
    title = key[0]
    illustrator = key[1]
    prehled_vse.loc[(prehled_vse['Book Name AlbatrosBooks'] == title) & (prehled_vse['Illustrator NK'] == illustrator)]['Název knihy'] = value
    prehled_vse.loc[(prehled_vse['Book Name AlbatrosBook Original'] == title) & (prehled_vse['Illustrators Scraped AlbatrosBook'] == illustrator), 'Název knihy'] = value

for key, value in fix_3.items():
    title = key[0]
    illustrator = key[1]
    prehled_vse.loc[(prehled_vse['Book Name'] == title) & (prehled_vse['Illustrators Scraped'] == illustrator), 'Název knihy'] = value
    print(prehled_vse.loc[(prehled_vse['Illustrators Scraped'] == illustrator), 'Název knihy'])

prehled_vse['cleaned_titles'] =  prehled_vse['Název knihy'].apply(clean_title_fun)

# Separate rows with missing 'Název knihy'
df_missing = prehled_vse[prehled_vse['cleaned_titles'].isnull()]


29                         A House Full of Little Wheels
31                        A Little House Full of Friends
47                      Alice’s Adventures in Wonderland
86                               Animals and the Robbers
87                            Animals and the Sports Day
96                                     Animals on Safari
156                                    Brave Tin Soldier
177                                           Cinderella
221                    Dozen Stories for the Little Ones
255                               Fisherman and his Wife
279                                          Golden Fish
290                   Hansel and Gretel (Vintage Pop-Up)
291      Hansel and Gretel (Vojtech Kubasta Fairy Tales)
403                               Jack and the Beanstalk
425                                Let the Table Be Set!
466                               Little Red Riding Hood
469              Little Red Riding Hood (Vintage Pop-Up)
546                            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prehled_vse.loc[(prehled_vse['Book Name AlbatrosBooks'] == title) & (prehled_vse['Illustrator NK'] == illustrator)]['Název knihy'] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prehled_vse.loc[(prehled_vse['Book Name AlbatrosBooks'] == title) & (prehled_vse['Illustrator NK'] == illustrator)]['Název knihy'] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

In [175]:
# 2. Group the non-missing ones
def combine_values(series):
    return '$ '.join(sorted(set(str(v) for v in series.dropna() if str(v).strip())))

df_grouped = prehled_vse[prehled_vse['cleaned_titles'].notna()].groupby('cleaned_titles').agg(combine_values).reset_index()


# 3. Combine them back
prehled_vse = pd.concat([df_grouped, df_missing], ignore_index=True)

# Replace empty strings and strings with only whitespace with None
prehled_vse = prehled_vse.replace(r'^\s*$', None, regex=True)

  prehled_vse = prehled_vse.replace(r'^\s*$', None, regex=True)


In [176]:
if pd.notnull(prehled_vse[prehled_vse['Book Name NK Original'] == 'Versatile vertebrates'].Scraping.values[0]): print('Not NaN')
else: print('NaN')

Not NaN


#### Change ones to True

In [177]:
def is_valid(value):
    if pd.isnull(value):  # catches None and NaN
        return False
    if isinstance(value, str) and value.strip() != '':
        return True
    return False

def age_size(row):
    if pd.notnull(row.get('je věk a rozměr'))  : return True
    if is_valid(row.get('Age')) and is_valid(row.get('Book Parametes')):  
        return True
    return None

def languages(row):
    print(row)
    if pd.notnull(row.get('jsou jazyky')) : return True
    if is_valid(row.get('Sold To')) : return True #and row['Sold To'] != 'Not found'
    if is_valid(row.get('Languages')) : return True 
    return None

def is_all(row):
    if all(pd.notnull(row[['je PDF', 'je věk a rozměr', 'jsou jazyky']])) : return True


prehled_vse.Scraping = prehled_vse.Scraping.apply(lambda x: True if pd.notnull(x)  else None)
prehled_vse.Albatros = prehled_vse.Albatros.apply(lambda x: True if pd.notnull(x)  else None) 
prehled_vse['B4U NK'] = prehled_vse['B4U NK'].apply(lambda x: True if pd.notnull(x)  else None)
prehled_vse['Sold Licence'] = prehled_vse['Sold Licence'].apply(lambda x: True if pd.notnull(x)  else None) 
prehled_vse['je PDF'] = prehled_vse['je PDF'].apply(lambda x: True if pd.notnull(x) else None) 
prehled_vse['je vše'] = prehled_vse['je vše'].apply(lambda x: True if pd.notnull(x) else None) 
prehled_vse['Scraping AlbatrosBook'] = prehled_vse['Scraping AlbatrosBook'].apply(lambda x: True if pd.notnull(x) else None) 

### Unify - add True where it belongs

In [178]:
prehled_vse['je věk a rozměr'] = prehled_vse[['Age', 'Book Parametes', 'je věk a rozměr']].apply(lambda x: age_size(x), axis = 1)
prehled_vse['jsou jazyky'] = prehled_vse[['jsou jazyky', 'Sold To', 'Languages']].apply(lambda x: languages(x), axis = 1)
prehled_vse['je vše'] = prehled_vse[['je PDF', 'je věk a rozměr', 'jsou jazyky']].apply(lambda x: is_all(x), axis = 1)
prehled_vse['Unnamed: 1'] = prehled_vse[['Unnamed: 0', 'Unnamed: 1']].apply(lambda x: 'alternativní názvy' if '$' in str(x['Unnamed: 0']) else x['Unnamed: 1'], axis=1)
prehled_vse['Název knihy'] = prehled_vse['Název knihy'].apply(lambda x: x.partition('$')[0] if '$' in str(x) else x)
prehled_vse['Book Name'] = prehled_vse['Book Name'].apply(lambda x: x.partition('$')[0] if '$' in str(x) else x)    

jsou jazyky                        1.0
Sold To                           None
Languages      ['Hungarian', 'Polish']
Name: 0, dtype: object
jsou jazyky           1.0
Sold To              None
Languages      ['Polish']
Name: 1, dtype: object
jsou jazyky           1.0
Sold To              None
Languages      ['Polish']
Name: 2, dtype: object
jsou jazyky           1.0
Sold To              None
Languages      ['Polish']
Name: 3, dtype: object
jsou jazyky                                                  1.0
Sold To                                                     None
Languages      ['Slovene', 'Hungarian', 'Polish', 'Bulgarian'...
Name: 4, dtype: object
jsou jazyky           1.0
Sold To              None
Languages      ['Polish']
Name: 5, dtype: object
jsou jazyky                                               1.0
Sold To                                                  None
Languages      ['Slovene', 'Hungarian', 'Polish', 'Romanian']
Name: 6, dtype: object
jsou jazyky                  

In [179]:
prehled_vse.to_excel('excel tables/Prehled_vse_extra.xlsx',index=False)