@@ -65,16 +65,106 @@ filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
6565filters_8tap_1d_fn2 (avg , 16 , BPC , avx2 , 16b pp )
6666#endif
6767
68+ #define decl_lpf_func (dir , wd , bpp , opt ) \
69+ void ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
70+ int E, int I, int H)
71+
72+ #define decl_lpf_funcs (dir , wd , bpp ) \
73+ decl_lpf_func(dir, wd, bpp, sse2); \
74+ decl_lpf_func(dir, wd, bpp, ssse3); \
75+ decl_lpf_func(dir, wd, bpp, avx)
76+
77+ #define decl_lpf_funcs_wd (dir ) \
78+ decl_lpf_funcs(dir, 4, BPC); \
79+ decl_lpf_funcs(dir, 8, BPC); \
80+ decl_lpf_funcs(dir, 16, BPC)
81+
82+ decl_lpf_funcs_wd (h );
83+ decl_lpf_funcs_wd (v );
84+
85+ #define lpf_16_wrapper (dir , off , bpp , opt ) \
86+ static void loop_filter_##dir##_16_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
87+ int E, int I, int H) \
88+ { \
89+ ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst, stride, E, I, H); \
90+ ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst + off, stride, E, I, H); \
91+ }
92+
93+ #define lpf_16_wrappers (bpp , opt ) \
94+ lpf_16_wrapper(h, 8 * stride, bpp, opt); \
95+ lpf_16_wrapper(v, 16, bpp, opt)
96+
97+ lpf_16_wrappers (BPC , sse2 );
98+ lpf_16_wrappers (BPC , ssse3 );
99+ lpf_16_wrappers (BPC , avx );
100+
101+ #define lpf_mix2_wrapper (dir , off , wd1 , wd2 , bpp , opt ) \
102+ static void loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
103+ int E, int I, int H) \
104+ { \
105+ ff_vp9_loop_filter_##dir##_##wd1##_##bpp##_##opt(dst, stride, \
106+ E & 0xff, I & 0xff, H & 0xff); \
107+ ff_vp9_loop_filter_##dir##_##wd2##_##bpp##_##opt(dst + off, stride, \
108+ E >> 8, I >> 8, H >> 8); \
109+ }
110+
111+ #define lpf_mix2_wrappers (wd1 , wd2 , bpp , opt ) \
112+ lpf_mix2_wrapper(h, 8 * stride, wd1, wd2, bpp, opt); \
113+ lpf_mix2_wrapper(v, 16, wd1, wd2, bpp, opt)
114+
115+ #define lpf_mix2_wrappers_set (bpp , opt ) \
116+ lpf_mix2_wrappers(4, 4, bpp, opt); \
117+ lpf_mix2_wrappers(4, 8, bpp, opt); \
118+ lpf_mix2_wrappers(8, 4, bpp, opt); \
119+ lpf_mix2_wrappers(8, 8, bpp, opt); \
120+
121+ lpf_mix2_wrappers_set (BPC , sse2 );
122+ lpf_mix2_wrappers_set (BPC , ssse3 );
123+ lpf_mix2_wrappers_set (BPC , avx );
68124#endif /* HAVE_YASM */
69125
70126av_cold void INIT_FUNC (VP9DSPContext * dsp )
71127{
72128#if HAVE_YASM
73129 int cpu_flags = av_get_cpu_flags ();
74130
131+ #define init_lpf_8_func (idx1 , idx2 , dir , wd , bpp , opt ) \
132+ dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
133+ #define init_lpf_16_func (idx , dir , bpp , opt ) \
134+ dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
135+ #define init_lpf_mix2_func (idx1 , idx2 , idx3 , dir , wd1 , wd2 , bpp , opt ) \
136+ dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt
137+
138+ #define init_lpf_funcs (bpp , opt ) \
139+ init_lpf_8_func(0, 0, h, 4, bpp, opt); \
140+ init_lpf_8_func(0, 1, v, 4, bpp, opt); \
141+ init_lpf_8_func(1, 0, h, 8, bpp, opt); \
142+ init_lpf_8_func(1, 1, v, 8, bpp, opt); \
143+ init_lpf_8_func(2, 0, h, 16, bpp, opt); \
144+ init_lpf_8_func(2, 1, v, 16, bpp, opt); \
145+ init_lpf_16_func(0, h, bpp, opt); \
146+ init_lpf_16_func(1, v, bpp, opt); \
147+ init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
148+ init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
149+ init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
150+ init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
151+ init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
152+ init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
153+ init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
154+ init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
155+
75156 if (EXTERNAL_SSE2 (cpu_flags )) {
76157 init_subpel3 (0 , put , BPC , sse2 );
77158 init_subpel3 (1 , avg , BPC , sse2 );
159+ init_lpf_funcs (BPC , sse2 );
160+ }
161+
162+ if (EXTERNAL_SSSE3 (cpu_flags )) {
163+ init_lpf_funcs (BPC , ssse3 );
164+ }
165+
166+ if (EXTERNAL_AVX (cpu_flags )) {
167+ init_lpf_funcs (BPC , avx );
78168 }
79169
80170 if (EXTERNAL_AVX2 (cpu_flags )) {
0 commit comments