In [1]:
# takes ~20sec
use Dan :ALL;

viz. https://en.wikipedia.org/w/index.php?title=Filmfare_Award_for_Best_Actress&action=edit

In [2]:
my @lines = 'FilmFare.txt'.IO.lines;
.say for @lines[0..20];

===1950s===

{| class="wikitable sortable" rowspan=2 style="text-align: left;" border="2" cellpadding="5"
|-
!scope="col" style="width:3%; text-align:center;"| Year
!scope="col" style="width:3%;text-align:center;"| Photos of winners
!scope="col" style="width:15%;text-align:center;"| Actress
!scope="col" style="width:15%;text-align:center;"| Role(s)
!scope="col" style="width:15%;text-align:center;"| Film
|-
! scope="row" rowspan=2 style="text-align:center" | 1954 <br /><small>[[1st Filmfare Awards|(1st)]] </small>
|rowspan=2 style="text-align:center"|{{border|[[File:Meena Kumari.jpg|95px|centre]]|width=2px|style=solid |color=gold}}
| style="background:#FAEB86;" |'''{{sort|Meena|[[Meena Kumari]]}}''' {{double dagger|alt=Award winner}}
| style="background:#FAEB86;" | '''{{sort|Gauri|Gauri}}'''
| style="background:#FAEB86;" | '''''[[Baiju Bawra (film)|Baiju Bawra]]'''''
|-
| colspan =3 style="text-align:center" | '''No Other Nominee'''
|-
! scope="row" rowspan=2 style="text-align:center" |

### PART I: USE REGEXES TO EXTRACT DATA ITEMS

In [3]:
my @exclusions = <Year|Photos|Actress|Role|Film|Filmfare|Awards|File|No|Other|Nominee|Portrait|Returns>;

my regex tcword {       # tite case word
    <upper>             # first character uppercase 
    <lower>+            # rest of chars lowercase
}

my regex name {
    <tcword>+ %% ' '    # one or more separated by a space
}

my regex year {         
    \d**4               # four consecutive digits
}

my ( $current, @years, @names );

for @lines -> $l is rw {
    next if $l ~~ /File/;               # avoid getting name from photo links

    $l ~~ s:g/<@exclusions>//;          # strip out exclusions

    if $l ~~ /<year>/ {
        $current = $<year>;             # current year is 'sticky'
    }

    if $l ~~ /<name>/ {
        @years.push: $current;
        @names.push: ~$<name>.trim;
    }
}
@names[0..10].join("::").say;

Meena::Gauri::Baiju Bawra::Meena::Lalita::Parineeta::Kamini::Biraj::Biraj Bahu::Geeta::Kamala


### PART II: MAP DATA ITEMS TO 2-D ARRAY AND MAKE DATAFRAME 

In [4]:
my @name3 = @names.rotor(3);            # rotor name over cols 1..3
my @year3 = @years.rotor(3);            # match year cadence to names

my @data;                               # 2d array

for 0..@year3 -> $m {
    next unless @year3[$m][0];
    
    @data[$m;0] = @year3[$m][0];
    @data[$m;1..3] = @name3[$m];
}

my @columns = <Year Actor Role Film>;

my \df = DataFrame.new( :@data, :@columns );
say ~df[0..5]^;


    Year  Actor   Role    Film        
 0  1954  Meena   Gauri   Baiju Bawra 
 1  1955  Meena   Lalita  Parineeta   
 2  1956  Kamini  Biraj   Biraj Bahu  
 3  1956  Geeta   Kamala  Vachan      
 4  1956  Meena   Shobha  Azaad       
 5  1957  Nutan   Gauri   Seema       


### PART III: MAKE SOME NEW SERIES AND DATAFRAMES TYPES

In [5]:
class YearSeries {
    has Series $.series handles *;

    submethod TWEAK { 
        given $!series.data {
            unless ( 1 
                && .all ~~ /<year>/
                && 1950 < .all < 2030
                #&& 1940 < .all < 2030
            ) {
                die "Data fails to meet YearSeries constraint."
            }
        }
    }
}

my $yse = YearSeries.new(series => Series.new(data => [1954, 1955, 2020]));
say ~$yse.series;


class NameSeries {
    has Series $.series handles *;

    submethod TWEAK {
        given $!series.data {
            unless ( .all ~~ /<name>/ ) {
                #die "Data fails to meet NameSeries constraint."
            }
        }
    }
}

my $mse = Series.new(data => <Bob Jane Alice>);
my $zse = NameSeries.new(series => $mse);
say ~$zse.series;

0	1954
1	1955
2	2020
dtype: Int, name: anon

0	Bob
1	Jane
2	Alice
dtype: Str, name: anon



In [6]:
class FilmDataFrame {
    has DataFrame $.dataframe handles *;

    submethod TWEAK {
        unless ( 1
            && YearSeries.new(series => self.dataframe[*]<Year>)
            && NameSeries.new(series => self.dataframe[*]<Actor>)
            && NameSeries.new(series => self.dataframe[*]<Role>)
            && NameSeries.new(series => self.dataframe[*]<Film>)
        ) {
            die "Data fails to meet FilmDataFrame constraint."
        }
    }
}
say 'ok'

ok


### PART IV: PERFORM TYPE CHECKS AT SERIES AND DATAFRAME LEVEL 

In [None]:
my FilmDataFrame $fdf .= new( dataframe => df );

In [None]:
say df[*]<Year>.grep(* <= 1950);